8.5 错误处理策略 🚨
"优雅的错误处理就像是人生的智慧 - 不是避免所有错误,而是在错误发生时知道如何从容应对。"
想象一下,你正在和朋友聊天时突然忘记了要说什么。是尴尬地僵在那里,还是自然地转移话题?MCP工具的错误处理也是同样的道理 - 关键不在于避免所有错误,而在于当错误发生时如何优雅地处理。
为什么错误处理如此重要? 🤔
错误处理的三重价值
- 用户体验 😊 - 友好的错误信息让用户知道发生了什么
- 系统稳定 🛡️ - 避免程序崩溃,保持服务正常运行
- 问题排查 🔍 - 详细的错误日志帮助快速定位问题
python
# ❌ 糟糕的错误处理
def bad_divide(a, b):
return a / b # 当b=0时程序直接崩溃,用户一脸懵逼
# ✅ 优雅的错误处理
def good_divide(a, b):
if b == 0:
raise ValueError("除数不能为零,这会导致宇宙爆炸!💥")
return a / b
优雅的错误处理框架 🎭
构建分层的错误处理体系
就像是构建一个"错误处理的金字塔",每一层都有其特定的职责。
typescript
// 🌟 分层错误处理框架
enum ErrorLevel {
INFO = "info",
WARNING = "warning",
ERROR = "error",
CRITICAL = "critical"
}
enum ErrorCategory {
VALIDATION = "validation",
AUTHENTICATION = "authentication",
AUTHORIZATION = "authorization",
RESOURCE_NOT_FOUND = "resource_not_found",
EXTERNAL_SERVICE = "external_service",
RATE_LIMIT = "rate_limit",
INTERNAL = "internal",
NETWORK = "network",
TIMEOUT = "timeout"
}
class McpError extends Error {
public readonly category: ErrorCategory;
public readonly level: ErrorLevel;
public readonly userMessage: string;
public readonly technicalMessage: string;
public readonly errorCode: string;
public readonly timestamp: Date;
public readonly retryable: boolean;
public readonly context: Record<string, any>;
constructor(
category: ErrorCategory,
level: ErrorLevel,
userMessage: string,
technicalMessage: string,
options: {
errorCode?: string;
retryable?: boolean;
context?: Record<string, any>;
cause?: Error;
} = {}
) {
super(userMessage);
this.name = 'McpError';
this.category = category;
this.level = level;
this.userMessage = userMessage;
this.technicalMessage = technicalMessage;
this.errorCode = options.errorCode || this.generateErrorCode();
this.timestamp = new Date();
this.retryable = options.retryable ?? false;
this.context = options.context || {};
if (options.cause) {
this.cause = options.cause;
}
}
private generateErrorCode(): string {
const timestamp = Date.now().toString(36).toUpperCase();
const random = Math.random().toString(36).substring(2, 8).toUpperCase();
return `${this.category.toUpperCase()}_${timestamp}_${random}`;
}
toJSON() {
return {
name: this.name,
category: this.category,
level: this.level,
userMessage: this.userMessage,
technicalMessage: this.technicalMessage,
errorCode: this.errorCode,
timestamp: this.timestamp.toISOString(),
retryable: this.retryable,
context: this.context,
stack: this.stack
};
}
// 静态工厂方法,让创建错误更方便
static validation(message: string, field?: string, value?: any): McpError {
return new McpError(
ErrorCategory.VALIDATION,
ErrorLevel.ERROR,
`参数验证失败:${message}`,
`Validation failed: ${message}`,
{
retryable: false,
context: { field, value }
}
);
}
static notFound(resource: string, identifier?: string): McpError {
return new McpError(
ErrorCategory.RESOURCE_NOT_FOUND,
ErrorLevel.ERROR,
`未找到${resource}${identifier ? `:${identifier}` : ''}`,
`Resource not found: ${resource}${identifier ? ` (${identifier})` : ''}`,
{
retryable: false,
context: { resource, identifier }
}
);
}
static externalService(
serviceName: string,
originalError: Error,
retryable: boolean = true
): McpError {
return new McpError(
ErrorCategory.EXTERNAL_SERVICE,
ErrorLevel.WARNING,
`${serviceName}服务暂时不可用,${retryable ? '请稍后重试' : '请联系管理员'}`,
`External service error: ${serviceName} - ${originalError.message}`,
{
retryable,
context: { serviceName, originalError: originalError.message },
cause: originalError
}
);
}
static rateLimit(retryAfterSeconds: number): McpError {
return new McpError(
ErrorCategory.RATE_LIMIT,
ErrorLevel.WARNING,
`请求过于频繁,请等待${retryAfterSeconds}秒后重试`,
`Rate limit exceeded, retry after ${retryAfterSeconds} seconds`,
{
retryable: true,
context: { retryAfterSeconds }
}
);
}
static timeout(operation: string, timeoutMs: number): McpError {
return new McpError(
ErrorCategory.TIMEOUT,
ErrorLevel.WARNING,
`操作超时:${operation}(${timeoutMs}ms)`,
`Operation timeout: ${operation} after ${timeoutMs}ms`,
{
retryable: true,
context: { operation, timeoutMs }
}
);
}
static internal(message: string, originalError?: Error): McpError {
return new McpError(
ErrorCategory.INTERNAL,
ErrorLevel.CRITICAL,
'系统内部错误,请联系技术支持',
`Internal error: ${message}`,
{
retryable: false,
context: { originalMessage: message },
cause: originalError
}
);
}
}
// 错误处理中间件
class ErrorHandler {
private logger: Logger;
private metrics: MetricsCollector;
constructor(logger: Logger, metrics: MetricsCollector) {
this.logger = logger;
this.metrics = metrics;
}
async handleError(error: Error, context: ExecutionContext): Promise<ErrorResponse> {
const mcpError = this.normalizeError(error);
// 记录错误日志
await this.logError(mcpError, context);
// 更新错误指标
this.updateMetrics(mcpError);
// 发送告警(如果需要)
if (mcpError.level === ErrorLevel.CRITICAL) {
await this.sendAlert(mcpError, context);
}
// 返回用户友好的错误响应
return this.createErrorResponse(mcpError);
}
private normalizeError(error: Error): McpError {
// 如果已经是MCP错误,直接返回
if (error instanceof McpError) {
return error;
}
// 根据错误类型转换为MCP错误
if (error.name === 'ValidationError') {
return McpError.validation(error.message);
}
if (error.name === 'TimeoutError') {
return McpError.timeout('操作', 30000);
}
if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
return McpError.externalService('外部服务', error);
}
if (error.message.includes('Rate limit') || error.message.includes('Too Many Requests')) {
return McpError.rateLimit(60);
}
// 默认为内部错误
return McpError.internal(error.message, error);
}
private async logError(error: McpError, context: ExecutionContext): Promise<void> {
const logData = {
...error.toJSON(),
context: {
...error.context,
userId: context.userId,
toolName: context.toolName,
requestId: context.requestId,
userAgent: context.userAgent,
ipAddress: context.ipAddress
}
};
switch (error.level) {
case ErrorLevel.INFO:
this.logger.info('MCP Error', logData);
break;
case ErrorLevel.WARNING:
this.logger.warn('MCP Error', logData);
break;
case ErrorLevel.ERROR:
this.logger.error('MCP Error', logData);
break;
case ErrorLevel.CRITICAL:
this.logger.critical('MCP Error', logData);
break;
}
}
private updateMetrics(error: McpError): void {
this.metrics.increment('mcp.errors.total', {
category: error.category,
level: error.level,
retryable: error.retryable.toString()
});
}
private async sendAlert(error: McpError, context: ExecutionContext): Promise<void> {
// 发送钉钉、邮件或短信告警
const alertMessage = {
title: `🚨 MCP严重错误告警`,
content: `
**错误信息**: ${error.userMessage}
**错误代码**: ${error.errorCode}
**工具名称**: ${context.toolName}
**用户ID**: ${context.userId}
**时间**: ${error.timestamp.toLocaleString('zh-CN')}
**技术详情**: ${error.technicalMessage}
`,
level: 'critical'
};
// 这里实现具体的告警发送逻辑
console.log('🚨 发送告警:', alertMessage);
}
private createErrorResponse(error: McpError): ErrorResponse {
return {
success: false,
error: {
code: error.errorCode,
message: error.userMessage,
category: error.category,
retryable: error.retryable,
timestamp: error.timestamp.toISOString()
},
retryAfter: error.retryable ? this.calculateRetryAfter(error) : undefined
};
}
private calculateRetryAfter(error: McpError): number {
// 根据错误类型计算重试时间
switch (error.category) {
case ErrorCategory.RATE_LIMIT:
return error.context.retryAfterSeconds || 60;
case ErrorCategory.EXTERNAL_SERVICE:
return 30;
case ErrorCategory.TIMEOUT:
return 10;
default:
return 60;
}
}
}
具体场景的错误处理实践 🎪
场景1:数据库查询工具
python
# 🌟 数据库查询工具的完整错误处理
import asyncio
import asyncpg
from typing import Dict, List, Any, Optional
import logging
from datetime import datetime, timedelta
class DatabaseQueryTool:
def __init__(self, connection_pool: asyncpg.Pool, logger: logging.Logger):
self.pool = connection_pool
self.logger = logger
self.max_retries = 3
self.retry_delay = 1.0 # 初始重试延迟(秒)
async def execute(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
execution_id = self._generate_execution_id()
start_time = datetime.now()
try:
self.logger.info(f"开始执行数据库查询 [{execution_id}]", extra={
'execution_id': execution_id,
'parameters': self._sanitize_params_for_log(parameters)
})
# 第一层:参数验证
validated_params = await self._validate_parameters(parameters)
# 第二层:安全检查
await self._security_check(validated_params)
# 第三层:执行查询(带重试机制)
result = await self._execute_with_retry(validated_params, execution_id)
# 成功日志
execution_time = (datetime.now() - start_time).total_seconds()
self.logger.info(f"数据库查询执行成功 [{execution_id}]", extra={
'execution_id': execution_id,
'execution_time': execution_time,
'result_count': len(result.get('rows', []))
})
return {
'success': True,
'data': result,
'execution_id': execution_id,
'execution_time': execution_time
}
except McpError:
# MCP错误直接重新抛出
raise
except Exception as e:
# 未预期的错误,包装后抛出
execution_time = (datetime.now() - start_time).total_seconds()
self.logger.error(f"数据库查询意外错误 [{execution_id}]", extra={
'execution_id': execution_id,
'execution_time': execution_time,
'error': str(e),
'error_type': type(e).__name__
}, exc_info=True)
raise McpError.internal(f"查询执行失败 [{execution_id}]", e)
async def _validate_parameters(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""参数验证 - 第一道防线"""
errors = []
# 检查必需参数
if not params.get('query'):
errors.append('query参数是必需的')
query = params.get('query', '').strip()
# 检查查询长度
if len(query) > 10000:
errors.append('查询语句过长(超过10000字符)')
# 检查查询类型(只允许SELECT)
query_upper = query.upper().strip()
if not query_upper.startswith('SELECT'):
errors.append('只允许执行SELECT查询')
# 检查参数类型
limit = params.get('limit', 100)
if not isinstance(limit, int) or limit < 1 or limit > 1000:
errors.append('limit必须是1-1000之间的整数')
# 如果有错误,抛出验证错误
if errors:
raise McpError.validation('; '.join(errors))
return {
'query': query,
'limit': limit,
'timeout': params.get('timeout', 30)
}
async def _security_check(self, params: Dict[str, Any]) -> None:
"""安全检查 - 第二道防线"""
query = params['query'].upper()
# 危险操作检查
dangerous_keywords = [
'DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'CREATE',
'TRUNCATE', 'GRANT', 'REVOKE', 'EXEC', 'EXECUTE'
]
for keyword in dangerous_keywords:
if keyword in query:
raise McpError(
ErrorCategory.AUTHORIZATION,
ErrorLevel.ERROR,
f'查询包含禁止的操作:{keyword}',
f'Dangerous keyword detected: {keyword}',
{'retryable': False, 'context': {'keyword': keyword}}
)
# SQL注入检查
injection_patterns = [
r"(\s|^)(OR|AND)\s+\d+\s*=\s*\d+",
r"(\s|^)(OR|AND)\s+['\"].*['\"]",
r"UNION\s+(ALL\s+)?SELECT",
r"--\s*",
r"/\*.*\*/"
]
import re
for pattern in injection_patterns:
if re.search(pattern, query, re.IGNORECASE):
raise McpError(
ErrorCategory.AUTHORIZATION,
ErrorLevel.ERROR,
'查询包含潜在的SQL注入攻击模式',
f'SQL injection pattern detected: {pattern}',
{'retryable': False, 'context': {'pattern': pattern}}
)
async def _execute_with_retry(
self,
params: Dict[str, Any],
execution_id: str
) -> Dict[str, Any]:
"""带重试机制的查询执行 - 第三道防线"""
last_error = None
for attempt in range(self.max_retries):
try:
return await self._execute_single_query(params, execution_id, attempt + 1)
except asyncpg.exceptions.ConnectionDoesNotExistError as e:
last_error = e
self.logger.warning(f"数据库连接已断开,尝试重试 [{execution_id}] 第{attempt + 1}次", extra={
'execution_id': execution_id,
'attempt': attempt + 1,
'error': str(e)
})
if attempt < self.max_retries - 1:
await asyncio.sleep(self.retry_delay * (2 ** attempt)) # 指数退避
except asyncpg.exceptions.PostgresError as e:
# 数据库级别的错误,通常不需要重试
if e.sqlstate.startswith('42'): # 语法错误
raise McpError.validation(f'SQL语法错误:{e.message}')
elif e.sqlstate.startswith('28'): # 认证错误
raise McpError(
ErrorCategory.AUTHENTICATION,
ErrorLevel.ERROR,
'数据库认证失败',
f'Database authentication failed: {e.message}',
{'retryable': False}
)
else:
# 其他数据库错误
raise McpError.externalService('数据库', e, retryable=False)
except asyncio.TimeoutError as e:
last_error = e
self.logger.warning(f"查询超时,尝试重试 [{execution_id}] 第{attempt + 1}次", extra={
'execution_id': execution_id,
'attempt': attempt + 1,
'timeout': params['timeout']
})
if attempt < self.max_retries - 1:
await asyncio.sleep(self.retry_delay * (2 ** attempt))
# 所有重试都失败了
if isinstance(last_error, asyncio.TimeoutError):
raise McpError.timeout(f'数据库查询 [{execution_id}]', params['timeout'] * 1000)
else:
raise McpError.externalService('数据库', last_error, retryable=False)
async def _execute_single_query(
self,
params: Dict[str, Any],
execution_id: str,
attempt: int
) -> Dict[str, Any]:
"""执行单次查询"""
async with self.pool.acquire() as connection:
try:
# 设置查询超时
await asyncio.wait_for(
self._run_query(connection, params, execution_id, attempt),
timeout=params['timeout']
)
except asyncio.TimeoutError:
# 尝试取消查询
try:
await connection.cancel()
except:
pass # 取消失败也没关系
raise
async def _run_query(
self,
connection: asyncpg.Connection,
params: Dict[str, Any],
execution_id: str,
attempt: int
) -> Dict[str, Any]:
"""运行实际的查询"""
query = params['query']
limit = params['limit']
# 添加LIMIT子句(如果查询中没有)
if 'LIMIT' not in query.upper():
query = f"{query.rstrip(';')} LIMIT {limit}"
self.logger.debug(f"执行查询 [{execution_id}] 第{attempt}次", extra={
'execution_id': execution_id,
'attempt': attempt,
'query': query[:200] + '...' if len(query) > 200 else query
})
# 执行查询
rows = await connection.fetch(query)
# 转换结果
result_data = []
for row in rows:
result_data.append(dict(row))
return {
'rows': result_data,
'count': len(result_data),
'query': query,
'execution_id': execution_id
}
def _generate_execution_id(self) -> str:
"""生成执行ID"""
import uuid
return f"db_query_{uuid.uuid4().hex[:8]}"
def _sanitize_params_for_log(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""清理参数用于日志记录"""
sanitized = params.copy()
# 隐藏敏感信息
sensitive_keys = ['password', 'token', 'key', 'secret']
for key in sensitive_keys:
if key in sanitized:
sanitized[key] = '***HIDDEN***'
# 截断长字符串
if 'query' in sanitized and len(sanitized['query']) > 500:
sanitized['query'] = sanitized['query'][:500] + '...'
return sanitized
# 使用示例
async def demo_database_tool():
# 创建连接池
pool = await asyncpg.create_pool(
"postgresql://user:password@localhost/database"
)
# 创建日志记录器
logger = logging.getLogger('database_tool')
logger.setLevel(logging.INFO)
# 创建工具实例
db_tool = DatabaseQueryTool(pool, logger)
try:
# 正常查询
result = await db_tool.execute({
'query': 'SELECT * FROM users WHERE age > 18',
'limit': 50
})
print("查询成功:", result)
except McpError as e:
print("MCP错误:", e.userMessage)
print("错误代码:", e.errorCode)
print("是否可重试:", e.retryable)
except Exception as e:
print("未处理的错误:", str(e))
重试机制设计 🔄
智能重试策略
重试就像是"再试一次"的勇气,但要有智慧地重试。
csharp
// 🌟 智能重试机制实现
public class SmartRetryHandler
{
private readonly ILogger<SmartRetryHandler> _logger;
private readonly Dictionary<Type, RetryPolicy> _policies;
public SmartRetryHandler(ILogger<SmartRetryHandler> logger)
{
_logger = logger;
_policies = InitializeRetryPolicies();
}
private Dictionary<Type, RetryPolicy> InitializeRetryPolicies()
{
return new Dictionary<Type, RetryPolicy>
{
// 网络错误:激进重试
[typeof(HttpRequestException)] = new RetryPolicy
{
MaxAttempts = 5,
BaseDelayMs = 1000,
BackoffStrategy = BackoffStrategy.ExponentialWithJitter,
MaxDelayMs = 30000,
RetryableStatusCodes = new[] { 408, 429, 500, 502, 503, 504 }
},
// 超时错误:中等重试
[typeof(TimeoutException)] = new RetryPolicy
{
MaxAttempts = 3,
BaseDelayMs = 2000,
BackoffStrategy = BackoffStrategy.Linear,
MaxDelayMs = 10000
},
// 数据库连接错误:保守重试
[typeof(InvalidOperationException)] = new RetryPolicy
{
MaxAttempts = 2,
BaseDelayMs = 5000,
BackoffStrategy = BackoffStrategy.Fixed,
MaxDelayMs = 5000
},
// 验证错误:不重试
[typeof(ArgumentException)] = new RetryPolicy
{
MaxAttempts = 1,
BaseDelayMs = 0,
BackoffStrategy = BackoffStrategy.None
}
};
}
public async Task<T> ExecuteWithRetryAsync<T>(
Func<Task<T>> operation,
string operationName,
CancellationToken cancellationToken = default)
{
Exception lastException = null;
var stopwatch = Stopwatch.StartNew();
for (int attempt = 1; attempt <= GetMaxAttempts(); attempt++)
{
try
{
_logger.LogInformation("执行操作:{OperationName},第{Attempt}次尝试",
operationName, attempt);
var result = await operation();
if (attempt > 1)
{
_logger.LogInformation("操作成功:{OperationName},第{Attempt}次尝试成功,耗时{ElapsedMs}ms",
operationName, attempt, stopwatch.ElapsedMilliseconds);
}
return result;
}
catch (Exception ex) when (ShouldRetry(ex, attempt))
{
lastException = ex;
var delay = CalculateDelay(ex, attempt);
_logger.LogWarning(ex,
"操作失败:{OperationName},第{Attempt}次尝试失败,{DelayMs}ms后重试。错误:{ErrorMessage}",
operationName, attempt, delay, ex.Message);
if (attempt < GetMaxAttempts(ex))
{
await Task.Delay(delay, cancellationToken);
}
}
catch (Exception ex)
{
// 不可重试的错误
_logger.LogError(ex, "操作失败:{OperationName},不可重试的错误:{ErrorMessage}",
operationName, ex.Message);
throw;
}
}
// 所有重试都失败了
var totalTime = stopwatch.ElapsedMilliseconds;
_logger.LogError(lastException,
"操作最终失败:{OperationName},{MaxAttempts}次尝试后仍然失败,总耗时{TotalTimeMs}ms",
operationName, GetMaxAttempts(lastException), totalTime);
throw new RetryExhaustedException(
$"操作 '{operationName}' 在 {GetMaxAttempts(lastException)} 次尝试后仍然失败",
lastException);
}
private bool ShouldRetry(Exception exception, int currentAttempt)
{
var policy = GetRetryPolicy(exception);
// 检查是否超过最大尝试次数
if (currentAttempt >= policy.MaxAttempts)
return false;
// 检查是否是可重试的异常类型
if (!IsRetryableException(exception))
return false;
// 特殊处理HTTP异常
if (exception is HttpRequestException httpEx)
{
return IsRetryableHttpError(httpEx, policy);
}
return true;
}
private RetryPolicy GetRetryPolicy(Exception exception)
{
var exceptionType = exception.GetType();
// 精确匹配
if (_policies.TryGetValue(exceptionType, out var policy))
return policy;
// 基类匹配
foreach (var kvp in _policies)
{
if (kvp.Key.IsAssignableFrom(exceptionType))
return kvp.Value;
}
// 默认策略
return new RetryPolicy
{
MaxAttempts = 2,
BaseDelayMs = 1000,
BackoffStrategy = BackoffStrategy.Fixed
};
}
private int CalculateDelay(Exception exception, int attempt)
{
var policy = GetRetryPolicy(exception);
int delay = policy.BackoffStrategy switch
{
BackoffStrategy.Fixed => policy.BaseDelayMs,
BackoffStrategy.Linear => policy.BaseDelayMs * attempt,
BackoffStrategy.Exponential => policy.BaseDelayMs * (int)Math.Pow(2, attempt - 1),
BackoffStrategy.ExponentialWithJitter => CalculateJitteredDelay(policy.BaseDelayMs, attempt),
_ => policy.BaseDelayMs
};
return Math.Min(delay, policy.MaxDelayMs);
}
private int CalculateJitteredDelay(int baseDelay, int attempt)
{
var exponentialDelay = baseDelay * (int)Math.Pow(2, attempt - 1);
var jitter = Random.Shared.Next(0, exponentialDelay / 4); // 25%的抖动
return exponentialDelay + jitter;
}
private bool IsRetryableHttpError(HttpRequestException httpEx, RetryPolicy policy)
{
// 这里需要从HttpRequestException中提取状态码
// 实际实现中可能需要使用HttpResponseMessage
var statusCode = ExtractStatusCode(httpEx);
return statusCode.HasValue &&
policy.RetryableStatusCodes?.Contains((int)statusCode.Value) == true;
}
private HttpStatusCode? ExtractStatusCode(HttpRequestException httpEx)
{
// 实际实现中,你可能需要:
// 1. 使用HttpResponseMessage
// 2. 解析异常消息
// 3. 使用自定义的HttpException类
// 简化示例:从消息中解析状态码
var message = httpEx.Message;
if (message.Contains("500")) return HttpStatusCode.InternalServerError;
if (message.Contains("502")) return HttpStatusCode.BadGateway;
if (message.Contains("503")) return HttpStatusCode.ServiceUnavailable;
if (message.Contains("504")) return HttpStatusCode.GatewayTimeout;
if (message.Contains("429")) return (HttpStatusCode)429; // Too Many Requests
return null;
}
private int GetMaxAttempts(Exception exception = null)
{
if (exception == null) return 3; // 默认值
return GetRetryPolicy(exception).MaxAttempts;
}
private bool IsRetryableException(Exception exception)
{
// 永远不重试的异常类型
var nonRetryableTypes = new[]
{
typeof(ArgumentException),
typeof(ArgumentNullException),
typeof(InvalidOperationException),
typeof(NotSupportedException),
typeof(UnauthorizedAccessException)
};
return !nonRetryableTypes.Any(type => type.IsAssignableFrom(exception.GetType()));
}
}
// 重试策略配置类
public class RetryPolicy
{
public int MaxAttempts { get; set; } = 3;
public int BaseDelayMs { get; set; } = 1000;
public int MaxDelayMs { get; set; } = 30000;
public BackoffStrategy BackoffStrategy { get; set; } = BackoffStrategy.Exponential;
public int[]? RetryableStatusCodes { get; set; }
}
public enum BackoffStrategy
{
None,
Fixed,
Linear,
Exponential,
ExponentialWithJitter
}
public class RetryExhaustedException : Exception
{
public RetryExhaustedException(string message, Exception innerException)
: base(message, innerException) { }
}
// 使用示例
public class WeatherApiClient
{
private readonly HttpClient _httpClient;
private readonly SmartRetryHandler _retryHandler;
private readonly ILogger<WeatherApiClient> _logger;
public WeatherApiClient(
HttpClient httpClient,
SmartRetryHandler retryHandler,
ILogger<WeatherApiClient> logger)
{
_httpClient = httpClient;
_retryHandler = retryHandler;
_logger = logger;
}
public async Task<WeatherData> GetWeatherAsync(string city)
{
return await _retryHandler.ExecuteWithRetryAsync(
operation: async () =>
{
var url = $"https://api.weather.com/v1/current?city={city}";
var response = await _httpClient.GetAsync(url);
if (!response.IsSuccessStatusCode)
{
throw new HttpRequestException(
$"Weather API returned {response.StatusCode}: {response.ReasonPhrase}");
}
var json = await response.Content.ReadAsStringAsync();
return JsonSerializer.Deserialize<WeatherData>(json);
},
operationName: $"获取{city}天气数据"
);
}
}
结构化错误响应 📋
标准化的错误响应格式
json
{
"success": false,
"error": {
"code": "VALIDATION_1735123456ABC_DEF789",
"category": "validation",
"level": "error",
"message": "参数验证失败:location参数不能为空",
"details": {
"field": "location",
"provided_value": null,
"expected_format": "非空字符串"
},
"retryable": false,
"timestamp": "2024-03-15T10:30:00.000Z"
},
"request_id": "req_abc123def456",
"execution_time_ms": 15,
"documentation": {
"error_reference": "https://docs.mcp.example.com/errors/validation",
"suggestions": [
"请提供有效的城市名称",
"支持中英文城市名,如:北京、Shanghai"
]
}
}
小结
错误处理的艺术在于:
🎯 核心原则
- 用户友好 - 错误信息要让用户看得懂
- 技术详细 - 日志要包含足够的技术细节
- 分层处理 - 不同层次处理不同类型的错误
- 智能重试 - 根据错误类型决定是否重试
- 标准格式 - 统一的错误响应格式
💡 实践要点
- 建立完整的错误分类体系
- 实现智能的重试机制
- 记录详细的错误日志
- 提供友好的用户错误信息
- 设置合适的告警机制
🎪 记住:好的错误处理不是避免所有错误,而是让错误变得可控、可理解、可解决。
下一节:性能优化技巧 - 让你的MCP服务器飞起来