# Routing Performance Metrics Implementation Design ## Overview This document details the implementation of routing performance metrics in the Order Management System (OMS), which tracks and analyzes the performance of different execution venues to optimize routing decisions and provide insights into system performance. ## Metrics Architecture The routing performance metrics system consists of several components: 1. **Metrics Collection**: Real-time collection of performance data from execution venues 2. **Metrics Storage**: Efficient storage of metrics data for analysis 3. **Metrics Aggregation**: Calculation of aggregated metrics for reporting 4. **Metrics Analysis**: Analysis of metrics to identify trends and issues 5. **Metrics Reporting**: Provision of metrics to monitoring systems and dashboards ## Metrics Models ### Base Metrics Interface ```csharp /// /// Base interface for all metrics /// public interface IMetric { /// /// Unique identifier for this metric /// string Id { get; } /// /// Name of this metric /// string Name { get; } /// /// Description of this metric /// string Description { get; } /// /// When this metric was created /// DateTime CreatedAt { get; } /// /// When this metric was last updated /// DateTime UpdatedAt { get; set; } /// /// Tags associated with this metric /// Dictionary Tags { get; } } ``` ### Routing Metrics Model ```csharp /// /// Routing performance metrics /// public record RoutingMetrics : IMetric { public string Id { get; set; } = Guid.NewGuid().ToString(); public string Name { get; set; } = "Routing Metrics"; public string Description { get; set; } = "Performance metrics for order routing"; public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public Dictionary Tags { get; set; } = new Dictionary(); // Overall routing statistics public int TotalRoutedOrders { get; set; } public int SuccessfulRoutedOrders { get; set; } public int FailedRoutedOrders { get; set; } public double SuccessRate => TotalRoutedOrders > 0 ? (double)SuccessfulRoutedOrders / TotalRoutedOrders : 0; // Performance metrics public double AverageRoutingTimeMs { get; set; } public double MedianRoutingTimeMs { get; set; } public double P95RoutingTimeMs { get; set; } public double P99RoutingTimeMs { get; set; } // Venue-specific metrics public Dictionary VenuePerformance { get; set; } = new Dictionary(); // Time-based metrics public Dictionary TimeBasedPerformance { get; set; } = new Dictionary(); // Algorithm-specific metrics public Dictionary AlgorithmPerformance { get; set; } = new Dictionary(); // Symbol-specific metrics public Dictionary SymbolPerformance { get; set; } = new Dictionary(); } ``` ### Venue Metrics Model ```csharp /// /// Metrics for a specific execution venue /// public record VenueMetrics : IMetric { public string Id { get; set; } public string Name { get; set; } public string Description { get; set; } = "Venue performance metrics"; public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public Dictionary Tags { get; set; } = new Dictionary(); // Basic venue information public string VenueId { get; set; } public string VenueName { get; set; } public VenueType VenueType { get; set; } // Order statistics public int TotalOrders { get; set; } public int SuccessfulOrders { get; set; } public int FailedOrders { get; set; } public int CancelledOrders { get; set; } public int ExpiredOrders { get; set; } public double FillRate => TotalOrders > 0 ? (double)SuccessfulOrders / TotalOrders : 0; // Performance metrics public double AverageLatencyMs { get; set; } public double MedianLatencyMs { get; set; } public double P95LatencyMs { get; set; } public double P99LatencyMs { get; set; } // Execution quality metrics public double AverageSlippage { get; set; } public double MedianSlippage { get; set; } public double P95Slippage { get; set; } public double P99Slippage { get; set; } // Value metrics public decimal TotalValueRouted { get; set; } public decimal TotalCommissionPaid { get; set; } public decimal AverageOrderValue { get; set; } // Time-based performance public Dictionary HourlyPerformance { get; set; } = new Dictionary(); // Order size distribution public Dictionary OrderSizeDistribution { get; set; } = new Dictionary(); // Error statistics public Dictionary ErrorCounts { get; set; } = new Dictionary(); } ``` ### Time-Based Metrics Model ```csharp /// /// Metrics for a specific time period /// public record TimeBasedMetrics : IMetric { public string Id { get; set; } public string Name { get; set; } public string Description { get; set; } = "Time-based performance metrics"; public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public Dictionary Tags { get; set; } = new Dictionary(); // Time period information public DateTime PeriodStart { get; set; } public DateTime PeriodEnd { get; set; } public string PeriodType { get; set; } // Hourly, Daily, Weekly, Monthly // Performance metrics for this period public int OrdersRouted { get; set; } public int SuccessfulOrders { get; set; } public double AverageRoutingTimeMs { get; set; } public double AverageSlippage { get; set; } public decimal TotalValueRouted { get; set; } } ``` ### Algorithm Metrics Model ```csharp /// /// Metrics for algorithmic order execution /// public record AlgorithmMetrics : IMetric { public string Id { get; set; } public string Name { get; set; } public string Description { get; set; } = "Algorithm performance metrics"; public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public Dictionary Tags { get; set; } = new Dictionary(); // Algorithm information public string AlgorithmType { get; set; } // TWAP, VWAP, Iceberg // Performance metrics public int TotalOrders { get; set; } public int CompletedOrders { get; set; } public double CompletionRate => TotalOrders > 0 ? (double)CompletedOrders / TotalOrders : 0; // Execution quality public double AverageTrackingError { get; set; } // For TWAP/VWAP public double AverageIcebergDetectionRate { get; set; } // For Iceberg orders public double AverageParticipationRate { get; set; } // For algorithmic orders // Time metrics public TimeSpan AverageExecutionDuration { get; set; } public TimeSpan MedianExecutionDuration { get; set; } // Venue distribution public Dictionary VenueDistribution { get; set; } = new Dictionary(); } ``` ### Symbol Metrics Model ```csharp /// /// Metrics for a specific trading symbol /// public record SymbolMetrics : IMetric { public string Id { get; set; } public string Name { get; set; } public string Description { get; set; } = "Symbol performance metrics"; public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public Dictionary Tags { get; set; } = new Dictionary(); // Symbol information public string Symbol { get; set; } public string AssetClass { get; set; } // Trading metrics public int TotalOrders { get; set; } public int SuccessfulOrders { get; set; } public double FillRate => TotalOrders > 0 ? (double)SuccessfulOrders / TotalOrders : 0; // Price metrics public double AverageSpread { get; set; } public double MedianSpread { get; set; } public double Volatility { get; set; } // Volume metrics public long TotalVolume { get; set; } public double AverageOrderSize { get; set; } // Venue performance for this symbol public Dictionary VenuePerformance { get; set; } = new Dictionary(); } ``` ## Metrics Collection System ### Metrics Collector ```csharp /// /// Collects and manages routing performance metrics /// public class RoutingMetricsCollector { private readonly ILogger _logger; private readonly IMetricsRepository _metricsRepository; private readonly RoutingMetrics _currentMetrics; private readonly object _lock = new object(); private readonly Timer _metricsFlushTimer; public RoutingMetricsCollector( ILogger logger, IMetricsRepository metricsRepository) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _metricsRepository = metricsRepository ?? throw new ArgumentNullException(nameof(metricsRepository)); _currentMetrics = new RoutingMetrics { Id = "routing-metrics-current", Name = "Current Routing Metrics", Description = "Current performance metrics for order routing", Tags = new Dictionary { ["type"] = "current" } }; // Initialize metrics from repository InitializeMetricsAsync().Wait(); // Set up periodic metrics flush _metricsFlushTimer = new Timer(FlushMetricsAsync, null, TimeSpan.FromMinutes(5), TimeSpan.FromMinutes(5)); } /// /// Record metrics for a routed order /// public void RecordOrderRouting(OrderRequest request, RoutingResult result, TimeSpan routingTime) { if (request == null) throw new ArgumentNullException(nameof(request)); if (result == null) throw new ArgumentNullException(nameof(result)); lock (_lock) { // Update overall routing metrics _currentMetrics.TotalRoutedOrders++; if (result.Success) { _currentMetrics.SuccessfulRoutedOrders++; } else { _currentMetrics.FailedRoutedOrders++; } // Update routing time metrics UpdateRoutingTimeMetrics(routingTime.TotalMilliseconds); // Update venue-specific metrics if (result.SelectedVenue != null) { UpdateVenueMetrics(request, result, routingTime); } // Update algorithm-specific metrics if (!string.IsNullOrEmpty(request.Algorithm)) { UpdateAlgorithmMetrics(request, result, routingTime); } // Update symbol-specific metrics UpdateSymbolMetrics(request, result, routingTime); _currentMetrics.UpdatedAt = DateTime.UtcNow; } } /// /// Record metrics for an executed order /// public void RecordOrderExecution(string venueId, VenueOrderResult result, TimeSpan executionTime, decimal slippage) { if (result == null) throw new ArgumentNullException(nameof(result)); if (string.IsNullOrEmpty(venueId)) throw new ArgumentException("Venue ID required", nameof(venueId)); lock (_lock) { // Update venue metrics with execution data if (_currentMetrics.VenuePerformance.ContainsKey(venueId)) { var venueMetrics = _currentMetrics.VenuePerformance[venueId]; // Update order counts venueMetrics.TotalOrders++; if (result.Success) { venueMetrics.SuccessfulOrders++; } else { venueMetrics.FailedOrders++; } // Update latency metrics UpdateLatencyMetrics(venueMetrics, executionTime.TotalMilliseconds); // Update slippage metrics UpdateSlippageMetrics(venueMetrics, (double)slippage); // Update value metrics if (result.Status != null) { var orderValue = result.Status.Quantity * (result.Status.LimitPrice ?? result.Status.Fills?.FirstOrDefault()?.FillPrice ?? 0); venueMetrics.TotalValueRouted += orderValue; venueMetrics.AverageOrderValue = venueMetrics.TotalOrders > 0 ? venueMetrics.TotalValueRouted / venueMetrics.TotalOrders : 0; } venueMetrics.UpdatedAt = DateTime.UtcNow; } } } private void UpdateRoutingTimeMetrics(double routingTimeMs) { // Simple incremental average calculation var currentAvg = _currentMetrics.AverageRoutingTimeMs; var count = _currentMetrics.TotalRoutedOrders; _currentMetrics.AverageRoutingTimeMs = ((currentAvg * (count - 1)) + routingTimeMs) / count; // In a production system, you would also track median, P95, P99 using proper statistical methods // For now, we'll just update these with simple approximations if (routingTimeMs > _currentMetrics.P99RoutingTimeMs) { _currentMetrics.P99RoutingTimeMs = routingTimeMs; } } private void UpdateVenueMetrics(OrderRequest request, RoutingResult result, TimeSpan routingTime) { var venueId = result.SelectedVenue.Id; if (!_currentMetrics.VenuePerformance.ContainsKey(venueId)) { _currentMetrics.VenuePerformance[venueId] = new VenueMetrics { Id = $"venue-metrics-{venueId}", Name = $"Metrics for {result.SelectedVenue.Name}", VenueId = venueId, VenueName = result.SelectedVenue.Name, VenueType = result.SelectedVenue.Type, Tags = new Dictionary { ["venue_id"] = venueId } }; } var venueMetrics = _currentMetrics.VenuePerformance[venueId]; venueMetrics.TotalOrders++; if (result.Success) { venueMetrics.SuccessfulOrders++; } else { venueMetrics.FailedOrders++; } // Update latency metrics UpdateLatencyMetrics(venueMetrics, routingTime.TotalMilliseconds); venueMetrics.UpdatedAt = DateTime.UtcNow; } private void UpdateLatencyMetrics(VenueMetrics venueMetrics, double latencyMs) { // Simple incremental average calculation var currentAvg = venueMetrics.AverageLatencyMs; var count = venueMetrics.TotalOrders; venueMetrics.AverageLatencyMs = ((currentAvg * (count - 1)) + latencyMs) / count; // Update order size distribution // This is a simplified approach - in reality, you would have more sophisticated bucketing var latencyBucket = GetLatencyBucket(latencyMs); if (venueMetrics.OrderSizeDistribution.ContainsKey(latencyBucket)) { venueMetrics.OrderSizeDistribution[latencyBucket]++; } else { venueMetrics.OrderSizeDistribution[latencyBucket] = 1; } } private void UpdateSlippageMetrics(VenueMetrics venueMetrics, double slippage) { // Simple incremental average calculation var currentAvg = venueMetrics.AverageSlippage; var count = venueMetrics.TotalOrders; venueMetrics.AverageSlippage = ((currentAvg * (count - 1)) + slippage) / count; } private string GetLatencyBucket(double latencyMs) { return latencyMs switch { <= 10 => "0-10ms", <= 50 => "11-50ms", <= 100 => "51-100ms", <= 500 => "101-500ms", <= 1000 => "501-1000ms", _ => "1000ms+" }; } private void UpdateAlgorithmMetrics(OrderRequest request, RoutingResult result, TimeSpan routingTime) { var algorithm = request.Algorithm; if (!_currentMetrics.AlgorithmPerformance.ContainsKey(algorithm)) { _currentMetrics.AlgorithmPerformance[algorithm] = new AlgorithmMetrics { Id = $"algorithm-metrics-{algorithm}", Name = $"Metrics for {algorithm}", AlgorithmType = algorithm, Tags = new Dictionary { ["algorithm"] = algorithm } }; } var algorithmMetrics = _currentMetrics.AlgorithmPerformance[algorithm]; algorithmMetrics.TotalOrders++; if (result.Success) { algorithmMetrics.CompletedOrders++; } // Update timing metrics algorithmMetrics.AverageExecutionDuration = TimeSpan.FromMilliseconds( (algorithmMetrics.AverageExecutionDuration.TotalMilliseconds * (algorithmMetrics.TotalOrders - 1) + routingTime.TotalMilliseconds) / algorithmMetrics.TotalOrders); // Update venue distribution if (result.SelectedVenue != null) { var venueId = result.SelectedVenue.Id; if (algorithmMetrics.VenueDistribution.ContainsKey(venueId)) { algorithmMetrics.VenueDistribution[venueId]++; } else { algorithmMetrics.VenueDistribution[venueId] = 1; } } algorithmMetrics.UpdatedAt = DateTime.UtcNow; } private void UpdateSymbolMetrics(OrderRequest request, RoutingResult result, TimeSpan routingTime) { var symbol = request.Symbol; if (!_currentMetrics.SymbolPerformance.ContainsKey(symbol)) { _currentMetrics.SymbolPerformance[symbol] = new SymbolMetrics { Id = $"symbol-metrics-{symbol}", Name = $"Metrics for {symbol}", Symbol = symbol, Tags = new Dictionary { ["symbol"] = symbol } }; } var symbolMetrics = _currentMetrics.SymbolPerformance[symbol]; symbolMetrics.TotalOrders++; if (result.Success) { symbolMetrics.SuccessfulOrders++; } // Update venue performance for this symbol if (result.SelectedVenue != null) { var venueId = result.SelectedVenue.Id; if (!symbolMetrics.VenuePerformance.ContainsKey(venueId)) { symbolMetrics.VenuePerformance[venueId] = new VenueMetrics { Id = $"symbol-venue-metrics-{symbol}-{venueId}", Name = $"Venue metrics for {symbol} at {result.SelectedVenue.Name}", VenueId = venueId, VenueName = result.SelectedVenue.Name, VenueType = result.SelectedVenue.Type, Tags = new Dictionary { ["symbol"] = symbol, ["venue_id"] = venueId } }; } var venueMetrics = symbolMetrics.VenuePerformance[venueId]; venueMetrics.TotalOrders++; if (result.Success) { venueMetrics.SuccessfulOrders++; } venueMetrics.UpdatedAt = DateTime.UtcNow; } symbolMetrics.UpdatedAt = DateTime.UtcNow; } /// /// Get current routing metrics /// public RoutingMetrics GetCurrentMetrics() { lock (_lock) { return new RoutingMetrics { Id = _currentMetrics.Id, Name = _currentMetrics.Name, Description = _currentMetrics.Description, CreatedAt = _currentMetrics.CreatedAt, UpdatedAt = _currentMetrics.UpdatedAt, Tags = new Dictionary(_currentMetrics.Tags), TotalRoutedOrders = _currentMetrics.TotalRoutedOrders, SuccessfulRoutedOrders = _currentMetrics.SuccessfulRoutedOrders, FailedRoutedOrders = _currentMetrics.FailedRoutedOrders, AverageRoutingTimeMs = _currentMetrics.AverageRoutingTimeMs, MedianRoutingTimeMs = _currentMetrics.MedianRoutingTimeMs, P95RoutingTimeMs = _currentMetrics.P95RoutingTimeMs, P99RoutingTimeMs = _currentMetrics.P99RoutingTimeMs, VenuePerformance = new Dictionary(_currentMetrics.VenuePerformance), TimeBasedPerformance = new Dictionary(_currentMetrics.TimeBasedPerformance), AlgorithmPerformance = new Dictionary(_currentMetrics.AlgorithmPerformance), SymbolPerformance = new Dictionary(_currentMetrics.SymbolPerformance) }; } } /// /// Get venue-specific metrics /// public VenueMetrics GetVenueMetrics(string venueId) { if (string.IsNullOrEmpty(venueId)) throw new ArgumentException("Venue ID required", nameof(venueId)); lock (_lock) { return _currentMetrics.VenuePerformance.ContainsKey(venueId) ? new VenueMetrics(_currentMetrics.VenuePerformance[venueId]) : null; } } /// /// Initialize metrics from repository /// private async Task InitializeMetricsAsync() { try { var savedMetrics = await _metricsRepository.GetMetricsAsync("routing-metrics-current"); if (savedMetrics != null) { lock (_lock) { // Copy relevant fields from saved metrics _currentMetrics.TotalRoutedOrders = savedMetrics.TotalRoutedOrders; _currentMetrics.SuccessfulRoutedOrders = savedMetrics.SuccessfulRoutedOrders; _currentMetrics.FailedRoutedOrders = savedMetrics.FailedRoutedOrders; _currentMetrics.AverageRoutingTimeMs = savedMetrics.AverageRoutingTimeMs; _currentMetrics.VenuePerformance = new Dictionary(savedMetrics.VenuePerformance); // Note: We don't restore time-based or algorithm metrics to keep memory usage reasonable } _logger.LogInformation("Routing metrics initialized from repository"); } } catch (Exception ex) { _logger.LogError(ex, "Error initializing routing metrics from repository"); } } /// /// Flush metrics to repository /// private async void FlushMetricsAsync(object state) { try { RoutingMetrics metricsToSave; lock (_lock) { metricsToSave = new RoutingMetrics(_currentMetrics); } await _metricsRepository.SaveMetricsAsync(metricsToSave); _logger.LogInformation("Routing metrics flushed to repository"); } catch (Exception ex) { _logger.LogError(ex, "Error flushing routing metrics to repository"); } } /// /// Reset metrics /// public void ResetMetrics() { lock (_lock) { _currentMetrics.TotalRoutedOrders = 0; _currentMetrics.SuccessfulRoutedOrders = 0; _currentMetrics.FailedRoutedOrders = 0; _currentMetrics.AverageRoutingTimeMs = 0; _currentMetrics.MedianRoutingTimeMs = 0; _currentMetrics.P95RoutingTimeMs = 0; _currentMetrics.P99RoutingTimeMs = 0; _currentMetrics.VenuePerformance.Clear(); _currentMetrics.TimeBasedPerformance.Clear(); _currentMetrics.AlgorithmPerformance.Clear(); _currentMetrics.SymbolPerformance.Clear(); _currentMetrics.UpdatedAt = DateTime.UtcNow; } _logger.LogInformation("Routing metrics reset"); } public void Dispose() { _metricsFlushTimer?.Dispose(); } } ``` ### Metrics Repository Interface ```csharp /// /// Repository for metrics storage and retrieval /// public interface IMetricsRepository { /// /// Get metrics by ID /// Task GetMetricsAsync(string metricsId) where T : class, IMetric; /// /// Save metrics /// Task SaveMetricsAsync(T metrics) where T : class, IMetric; /// /// Delete metrics /// Task DeleteMetricsAsync(string metricsId); /// /// Get metrics by time range /// Task> GetMetricsByTimeRangeAsync(DateTime startTime, DateTime endTime) where T : class, IMetric; /// /// Get metrics by tags /// Task> GetMetricsByTagsAsync(Dictionary tags) where T : class, IMetric; } ``` ### In-Memory Metrics Repository ```csharp /// /// In-memory implementation of metrics repository for development and testing /// public class InMemoryMetricsRepository : IMetricsRepository { private readonly Dictionary _metrics; private readonly ILogger _logger; private readonly object _lock = new object(); public InMemoryMetricsRepository(ILogger logger) { _metrics = new Dictionary(); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public Task GetMetricsAsync(string metricsId) where T : class, IMetric { if (string.IsNullOrEmpty(metricsId)) throw new ArgumentException("Metrics ID required", nameof(metricsId)); lock (_lock) { return Task.FromResult(_metrics.ContainsKey(metricsId) ? _metrics[metricsId] as T : null); } } public Task SaveMetricsAsync(T metrics) where T : class, IMetric { if (metrics == null) throw new ArgumentNullException(nameof(metrics)); if (string.IsNullOrEmpty(metrics.Id)) throw new ArgumentException("Metrics ID required", nameof(metrics)); lock (_lock) { _metrics[metrics.Id] = metrics; } _logger.LogDebug("Metrics saved: {MetricsId}", metrics.Id); return Task.CompletedTask; } public Task DeleteMetricsAsync(string metricsId) { if (string.IsNullOrEmpty(metricsId)) throw new ArgumentException("Metrics ID required", nameof(metricsId)); lock (_lock) { if (_metrics.ContainsKey(metricsId)) { _metrics.Remove(metricsId); } } _logger.LogDebug("Metrics deleted: {MetricsId}", metricsId); return Task.CompletedTask; } public Task> GetMetricsByTimeRangeAsync(DateTime startTime, DateTime endTime) where T : class, IMetric { lock (_lock) { var result = _metrics.Values .OfType() .Where(m => m.UpdatedAt >= startTime && m.UpdatedAt <= endTime) .ToList(); return Task.FromResult(result); } } public Task> GetMetricsByTagsAsync(Dictionary tags) where T : class, IMetric { if (tags == null) throw new ArgumentNullException(nameof(tags)); lock (_lock) { var result = _metrics.Values .OfType() .Where(m => tags.All(tag => m.Tags.ContainsKey(tag.Key) && m.Tags[tag.Key] == tag.Value)) .ToList(); return Task.FromResult(result); } } } ``` ## Integration with OrderManager ### Metrics Integration in OrderManager ```csharp public partial class OrderManager : IOrderManager { private readonly RoutingMetricsCollector _metricsCollector; // Enhanced constructor with metrics collector public OrderManager( IRiskManager riskManager, IPositionSizer positionSizer, ILogger logger, RoutingConfigurationManager configManager, RoutingMetricsCollector metricsCollector) : base(riskManager, positionSizer, logger, configManager) { _metricsCollector = metricsCollector ?? throw new ArgumentNullException(nameof(metricsCollector)); _venueManager = new VenueManager(logger); _omsToVenueOrderIdMap = new Dictionary(); _venueToOmsOrderIdMap = new Dictionary(); // Initialize with configurations InitializeWithConfigurationsAsync().Wait(); } // Enhanced routing with metrics collection public async Task RouteOrderAsync(OrderRequest request, StrategyContext context) { var startTime = DateTime.UtcNow; try { var result = await RouteOrderInternalAsync(request, context); var endTime = DateTime.UtcNow; var routingTime = endTime - startTime; // Record routing metrics _metricsCollector.RecordOrderRouting(request, result, routingTime); return result; } catch (Exception ex) { var endTime = DateTime.UtcNow; var routingTime = endTime - startTime; // Record failed routing metrics _metricsCollector.RecordOrderRouting(request, new RoutingResult(false, null, null, ex.Message, new Dictionary { ["error"] = ex.Message }), routingTime); throw; } } private async Task RouteOrderInternalAsync(OrderRequest request, StrategyContext context) { // Existing routing logic here... // This is the same as the previous implementation return await base.RouteOrderAsync(request, context); } // Enhanced order submission with execution metrics public async Task SubmitOrderAsync(OrderRequest request, StrategyContext context) { // Validate request parameters if (!request.IsValid(out var errors)) { return new OrderResult(false, null, string.Join("; ", errors), null); } // Validate through risk management var riskDecision = await ValidateOrderAsync(request, context); if (!riskDecision.Allow) { _logger.LogWarning("Order rejected by risk management: {Reason}", riskDecision.RejectReason); return new OrderResult(false, null, $"Risk validation failed: {riskDecision.RejectReason}", null); } var startTime = DateTime.UtcNow; try { // Route order to appropriate venue var routingResult = await RouteOrderAsync(request, context); if (!routingResult.Success) { _logger.LogError("Order routing failed: {Message}", routingResult.Message); return new OrderResult(false, null, routingResult.Message, null); } // Submit to selected venue var venueOrderRequest = ConvertToVenueOrderRequest(request); var venueResult = await routingResult.SelectedVenue.SubmitOrderAsync(venueOrderRequest); var endTime = DateTime.UtcNow; var executionTime = endTime - startTime; // Record execution metrics if (venueResult != null) { // Calculate slippage (simplified) var slippage = CalculateSlippage(request, venueResult); _metricsCollector.RecordOrderExecution(routingResult.SelectedVenue.Id, venueResult, executionTime, slippage); } if (venueResult?.Success == true) { // Map order IDs lock (_lock) { _omsToVenueOrderIdMap[venueResult.VenueOrderId] = venueResult.VenueOrderId; _venueToOmsOrderIdMap[venueResult.VenueOrderId] = venueResult.VenueOrderId; } // Create order status var orderStatus = ConvertToOrderStatus(venueResult.Status, request); // Store order status lock (_lock) { _orders[venueResult.VenueOrderId] = orderStatus; // Using venue order ID as key } _logger.LogInformation("Order {OrderId} submitted to venue {Venue}", venueResult.VenueOrderId, routingResult.SelectedVenue.Name); return new OrderResult(true, venueResult.VenueOrderId, "Order submitted successfully", orderStatus); } else { _logger.LogError("Order submission failed at venue {Venue}: {Message}", routingResult.SelectedVenue.Name, venueResult?.Message ?? "Unknown error"); return new OrderResult(false, null, $"Venue submission failed: {venueResult?.Message ?? "Unknown error"}", null); } } catch (Exception ex) { var endTime = DateTime.UtcNow; var executionTime = endTime - startTime; _logger.LogError(ex, "Error submitting order for {Symbol}", request.Symbol); // Record failed execution metrics _metricsCollector.RecordOrderExecution("unknown", new VenueOrderResult(false, null, ex.Message, null, new Dictionary { ["error"] = ex.Message }), executionTime, 0); return new OrderResult(false, null, $"Error submitting order: {ex.Message}", null); } } private decimal CalculateSlippage(OrderRequest request, VenueOrderResult venueResult) { // Simplified slippage calculation // In a real implementation, this would compare expected vs actual execution prices if (request.LimitPrice.HasValue && venueResult.Status?.Fills?.Any() == true) { var averageFillPrice = venueResult.Status.Fills.Average(f => f.FillPrice); var expectedPrice = request.LimitPrice.Value; // Calculate percentage slippage if (expectedPrice != 0) { return Math.Abs((averageFillPrice - expectedPrice) / expectedPrice) * 100; } } return 0; } // Enhanced methods to expose metrics public RoutingMetrics GetRoutingMetrics() { return _metricsCollector.GetCurrentMetrics(); } public VenueMetrics GetVenueMetrics(string venueId) { return _metricsCollector.GetVenueMetrics(venueId); } } ``` ## Metrics Analysis and Alerting ### Metrics Analyzer ```csharp /// /// Analyzes routing metrics to identify trends and issues /// public class RoutingMetricsAnalyzer { private readonly ILogger _logger; private readonly RoutingMetricsCollector _metricsCollector; private readonly List _activeAlerts; public RoutingMetricsAnalyzer( ILogger logger, RoutingMetricsCollector metricsCollector) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); _metricsCollector = metricsCollector ?? throw new ArgumentNullException(nameof(metricsCollector)); _activeAlerts = new List(); } /// /// Analyze current metrics and generate alerts if needed /// public List AnalyzeMetrics() { var alerts = new List(); var currentMetrics = _metricsCollector.GetCurrentMetrics(); // Check overall routing success rate if (currentMetrics.TotalRoutedOrders > 100) // Only analyze if we have enough data { if (currentMetrics.SuccessRate < 0.95) // Less than 95% success rate { alerts.Add(new MetricsAlert { Id = Guid.NewGuid().ToString(), AlertType = "LOW_SUCCESS_RATE", Severity = AlertSeverity.High, Message = $"Routing success rate is low: {currentMetrics.SuccessRate:P2}", Timestamp = DateTime.UtcNow, Metrics = new Dictionary { ["success_rate"] = currentMetrics.SuccessRate, ["total_orders"] = currentMetrics.TotalRoutedOrders } }); } } // Check average routing time if (currentMetrics.AverageRoutingTimeMs > 1000) // More than 1 second average { alerts.Add(new MetricsAlert { Id = Guid.NewGuid().ToString(), AlertType = "HIGH_LATENCY", Severity = AlertSeverity.Medium, Message = $"Average routing time is high: {currentMetrics.AverageRoutingTimeMs:F2}ms", Timestamp = DateTime.UtcNow, Metrics = new Dictionary { ["average_routing_time"] = currentMetrics.AverageRoutingTimeMs } }); } // Check venue-specific metrics foreach (var kvp in currentMetrics.VenuePerformance) { var venueId = kvp.Key; var venueMetrics = kvp.Value; // Check venue fill rate if (venueMetrics.TotalOrders > 50) // Only analyze if we have enough data { if (venueMetrics.FillRate < 0.90) // Less than 90% fill rate { alerts.Add(new MetricsAlert { Id = Guid.NewGuid().ToString(), AlertType = "LOW_VENUE_FILL_RATE", Severity = AlertSeverity.Medium, Message = $"Venue {venueMetrics.VenueName} fill rate is low: {venueMetrics.FillRate:P2}", Timestamp = DateTime.UtcNow, Metrics = new Dictionary { ["venue_id"] = venueId, ["venue_name"] = venueMetrics.VenueName, ["fill_rate"] = venueMetrics.FillRate, ["total_orders"] = venueMetrics.TotalOrders } }); } } // Check venue latency if (venueMetrics.AverageLatencyMs > 500) // More than 500ms average { alerts.Add(new MetricsAlert { Id = Guid.NewGuid().ToString(), AlertType = "HIGH_VENUE_LATENCY", Severity = AlertSeverity.Low, Message = $"Venue {venueMetrics.VenueName} latency is high: {venueMetrics.AverageLatencyMs:F2}ms", Timestamp = DateTime.UtcNow, Metrics = new Dictionary { ["venue_id"] = venueId, ["venue_name"] = venueMetrics.VenueName, ["average_latency"] = venueMetrics.AverageLatencyMs } }); } } // Check for new alerts var newAlerts = alerts.Where(a => !_activeAlerts.Any(aa => aa.AlertType == a.AlertType && aa.Severity == a.Severity)).ToList(); // Update active alerts _activeAlerts.Clear(); _activeAlerts.AddRange(alerts); if (newAlerts.Any()) { _logger.LogInformation("Generated {Count} new metrics alerts", newAlerts.Count); } return newAlerts; } } /// /// Represents a metrics alert /// public record MetricsAlert { public string Id { get; set; } public string AlertType { get; set; } public AlertSeverity Severity { get; set; } public string Message { get; set; } public DateTime Timestamp { get; set; } public Dictionary Metrics { get; set; } = new Dictionary(); } /// /// Alert severity levels /// public enum AlertSeverity { Low, Medium, High, Critical } ``` ## Testing Considerations ### Unit Tests for Metrics System 1. **Metrics Collection**: Test collection of different types of metrics 2. **Metrics Aggregation**: Test aggregation of metrics over time 3. **Metrics Storage**: Test persistence and retrieval of metrics 4. **Metrics Analysis**: Test analysis and alerting based on metrics 5. **Performance Impact**: Test that metrics collection doesn't significantly impact performance ### Integration Tests 1. **End-to-End Metrics**: Test complete metrics flow from order routing to reporting 2. **Metrics Repository**: Test different repository implementations 3. **Metrics Analysis**: Test alert generation based on different metric thresholds 4. **Metrics Reset**: Test metrics reset functionality ## Performance Considerations ### Metrics Sampling ```csharp /// /// Controls sampling of metrics to reduce performance impact /// public class MetricsSampler { private readonly double _samplingRate; private readonly Random _random; public MetricsSampler(double samplingRate = 1.0) // 1.0 = 100% sampling { if (samplingRate < 0 || samplingRate > 1) throw new ArgumentOutOfRangeException(nameof(samplingRate), "Sampling rate must be between 0 and 1"); _samplingRate = samplingRate; _random = new Random(); } public bool ShouldSample() { return _samplingRate >= 1.0 || _random.NextDouble() < _samplingRate; } } ``` ### Metrics Batching ```csharp /// /// Batches metrics updates to reduce storage overhead /// public class MetricsBatcher { private readonly List _batch; private readonly int _batchSize; private readonly IMetricsRepository _repository; private readonly object _lock = new object(); public MetricsBatcher(IMetricsRepository repository, int batchSize = 10) { _batch = new List(); _batchSize = batchSize; _repository = repository ?? throw new ArgumentNullException(nameof(repository)); } public async Task AddMetricAsync(IMetric metric) { List batchToProcess = null; lock (_lock) { _batch.Add(metric); if (_batch.Count >= _batchSize) { batchToProcess = new List(_batch); _batch.Clear(); } } if (batchToProcess != null) { await ProcessBatchAsync(batchToProcess); } } private async Task ProcessBatchAsync(List batch) { try { // Save all metrics in the batch var saveTasks = batch.Select(metric => _repository.SaveMetricsAsync(metric)); await Task.WhenAll(saveTasks); } catch (Exception ex) { // Log error but don't throw to avoid breaking the main flow // In a real implementation, you might want to queue failed metrics for retry } } } ``` ## Monitoring and Dashboard Integration ### Metrics Exporter ```csharp /// /// Exports metrics in formats suitable for monitoring systems /// public class MetricsExporter { private readonly RoutingMetricsCollector _metricsCollector; public MetricsExporter(RoutingMetricsCollector metricsCollector) { _metricsCollector = metricsCollector ?? throw new ArgumentNullException(nameof(metricsCollector)); } /// /// Export metrics in Prometheus format /// public string ExportToPrometheus() { var metrics = _metricsCollector.GetCurrentMetrics(); var sb = new StringBuilder(); // Overall routing metrics sb.AppendLine($"# HELP routing_total_orders Total number of routed orders"); sb.AppendLine($"# TYPE routing_total_orders counter"); sb.AppendLine($"routing_total_orders {metrics.TotalRoutedOrders}"); sb.AppendLine($"# HELP routing_success_rate Ratio of successful routed orders"); sb.AppendLine($"# TYPE routing_success_rate gauge"); sb.AppendLine($"routing_success_rate {metrics.SuccessRate:F4}"); sb.AppendLine($"# HELP routing_average_time_ms Average routing time in milliseconds"); sb.AppendLine($"# TYPE routing_average_time_ms gauge"); sb.AppendLine($"routing_average_time_ms {metrics.AverageRoutingTimeMs:F2}"); // Venue-specific metrics foreach (var kvp in metrics.VenuePerformance) { var venueMetrics = kvp.Value; sb.AppendLine($"# HELP venue_orders_total Total orders for venue {venueMetrics.VenueName}"); sb.AppendLine($"# TYPE venue_orders_total counter"); sb.AppendLine($"venue_orders_total{{venue=\"{venueMetrics.VenueName}\"}} {venueMetrics.TotalOrders}"); sb.AppendLine($"# HELP venue_fill_rate Fill rate for venue {venueMetrics.VenueName}"); sb.AppendLine($"# TYPE venue_fill_rate gauge"); sb.AppendLine($"venue_fill_rate{{venue=\"{venueMetrics.VenueName}\"}} {venueMetrics.FillRate:F4}"); sb.AppendLine($"# HELP venue_average_latency_ms Average latency for venue {venueMetrics.VenueName}"); sb.AppendLine($"# TYPE venue_average_latency_ms gauge"); sb.AppendLine($"venue_average_latency_ms{{venue=\"{venueMetrics.VenueName}\"}} {venueMetrics.AverageLatencyMs:F2}"); } return sb.ToString(); } /// /// Export metrics in JSON format /// public string ExportToJson() { var metrics = _metricsCollector.GetCurrentMetrics(); return JsonSerializer.Serialize(metrics, new JsonSerializerOptions { WriteIndented = true }); } } ``` ## Future Enhancements 1. **Real-time Metrics Streaming**: Stream metrics to monitoring systems in real-time 2. **Advanced Analytics**: Use machine learning to predict routing performance 3. **Custom Metrics**: Allow users to define custom metrics and alerts 4. **Metrics Retention**: Implement configurable metrics retention policies 5. **Metrics Compression**: Compress historical metrics to save storage space 6. **Metrics Visualization**: Built-in visualization of metrics trends 7. **Metrics Correlation**: Correlate metrics with market conditions and events 8. **Metrics Forecasting**: Predict future performance based on historical metrics