
可观测性能够回答你在构建系统时未曾预料到的问题。OpenTelemetry 标准化了跨语言和跨供应商的仪器化。
三大支柱
- 追踪(Traces):请求在服务间的路径(延迟、因果关系)
- 指标(Metrics):聚合数据(P99、错误率、吞吐量)
- 日志(Logs):带有上下文的离散事件
力量来自关联:指标飙升 → 失败的追踪 → 解释原因的日志。

初始化追踪器(Go)
func initTracer(ctx context.Context) (*sdktrace.TracerProvider, error) {
exporter, _ := otlptracegrpc.New(ctx,
otlptracegrpc.WithEndpoint("otel-collector:4317"),
otlptracegrpc.WithInsecure(),
)
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(0.1))),
sdktrace.WithResource(resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName("order-service"),
semconv.ServiceVersion("1.2.0"),
)),
)
otel.SetTracerProvider(tp)
return tp, nil
}

仪器化函数
var tracer = otel.Tracer("order-service")
func (s *service) CreateOrder(ctx context.Context, req *pb.Request) (*pb.Order, error) {
ctx, span := tracer.Start(ctx, "CreateOrder",
trace.WithAttributes(
attribute.String("customer.id", req.CustomerId),
attribute.Int("items.count", len(req.Items)),
),
)
defer span.End()
order, err := s.db.CreateOrder(ctx, req)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, "database error")
return nil, err
}
span.SetAttributes(attribute.String("order.id", order.ID))
return orderToProto(order), nil
}

自定义指标
func initMetrics() {
exporter, _ := prometheus.New()
meter := metric.NewMeterProvider(metric.WithReader(exporter)).Meter("order-service")
orderCounter, _ = meter.Int64Counter("orders_total",
metric.WithDescription("Total orders created"))
orderLatency, _ = meter.Float64Histogram("order_duration_seconds",
metric.WithExplicitBucketBoundaries(0.001, 0.01, 0.1, 0.5, 1.0, 5.0))
}
日志-追踪关联
func logWithTrace(ctx context.Context, logger *zap.Logger, msg string, fields ...zap.Field) {
span := trace.SpanFromContext(ctx)
if span.IsRecording() {
sc := span.SpanContext()
fields = append(fields,
zap.String("trace_id", sc.TraceID().String()),
zap.String("span_id", sc.SpanID().String()),
)
}
logger.Info(msg, fields...)
}
OTel Collector 配置
receivers:
otlp:
protocols:
grpc: {endpoint: 0.0.0.0:4317}
http: {endpoint: 0.0.0.0:4318}
processors:
batch: {timeout: 1s, send_batch_size: 1024}
exporters:
jaeger:
endpoint: jaeger:14250
tls: {insecure: true}
prometheus:
endpoint: "0.0.0.0:8889"
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]
可观测性是一项团队纪律。技术已经标准化。工作在于仪器化正确的事件,并建立阅读这些事件的文化。
→ 使用 Base64 转换器 工具解码 Base64 格式的 trace ID。