const service = OrderQueryService.getDetails(orderId);trace.query("orders").where({ status: "paid" }); SELECT * FROM orders WHERE user_id = ? LIMIT 50;span.setAttribute("db.duration", 1240); if (cache.miss) await repository.findOrderItems();metric.histogram("order.p95.latency").record(1800); await Promise.all([fetchPayment(), fetchShipment()]);logger.warn("slow sql detected", queryHash); OrderQueryService.getDetails -> queryOrderSkuListdb.statement: SELECT sku_id, price, count FROM order_item apm.linkTrace(traceId, serviceName, endpoint);slowSql.count = slowSql.count + 1; pipeline.scan(productionTraffic, errorLogs);duration.breakdown.database = 0.67; rootCause.rank(["database", "cache", "network"]);agent.context.attach(observabilitySnapshot); const p95 = quantile(latencySamples, 0.95);return composeOrderDetails(order, items, user); ordersRepository.findById(orderId, { traceId });db.pool.waiting = connectionPool.pendingCount; trace.child("mysql.query").setStatus("slow");cache.hitRate("order.details").below(0.42); SELECT order_id,total,status FROM order_summary;agent.read(span.events).filter(e => e.error); service.map("checkout").edge("mysql-primary");slowSql.topK(3).groupBy("statement_hash"); if (latency.p95 > 1500) raiseFinding("P95");profiling.cpu.attach("OrderQueryService"); queryPlan.rows_examined = 184205;indexAdvisor.suggest(["idx_order_user_time"]); timeline.mark("db", 0, 1206).mark("app", 1206, 1800);log.pattern("timeout").count(328); span.link(logs, metrics, traces, profiles);rootCause = "database query fan-out"; agent.summary.write(performanceFindings);patch.plan("add composite index").estimateImpact(); observe.prod("order-service").window("15m");notify.channel("incident-room").preview();
Production Traffic1.24M Requests
P95 Latency1.8s
Database Time67%
Error Logs328 Events
Developer分析 OrderQueryService 在线上的实际运行情况
Coding Agent正在关联生产环境观测数据...
已完成生产环境分析订单服务 P95 延迟达到 1.8s,
数据库查询占总耗时 67%,
发现 3 条高频慢 SQL。
主要性能瓶颈位于
OrderQueryService.getDetails()