1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
| # benchmark.py — 统一基准测试模板
import time
import numpy as np
import onnxruntime as ort
import json
import platform
def benchmark_model(infer_fn, input_data, warmup=30, runs=200,
model_name="model", device_info=""):
"""
标准化基准测试
Args:
infer_fn: 推理函数(接收 input_data,返回输出)
input_data: 固定输入数据(numpy array)
warmup: 预热轮数
runs: 基准测试轮数
"""
# 预热(排除首次 kernel 编译、缓存填充)
for _ in range(warmup):
_ = infer_fn(input_data)
# 基准测试
latencies = []
for _ in range(runs):
t0 = time.perf_counter()
_ = infer_fn(input_data)
latencies.append((time.perf_counter() - t0) * 1000)
latencies.sort()
result = {
"model": model_name,
"device": device_info,
"mean_ms": float(np.mean(latencies)),
"median_ms": float(np.median(latencies)),
"p50_ms": float(latencies[runs // 2]),
"p90_ms": float(latencies[int(runs * 0.90)]),
"p95_ms": float(latencies[int(runs * 0.95)]),
"p99_ms": float(latencies[int(runs * 0.99)]),
"min_ms": float(latencies[0]),
"max_ms": float(latencies[-1]),
"std_ms": float(np.std(latencies)),
"fps": 1000.0 / np.mean(latencies),
"runs": runs,
"warmup": warmup,
"system": platform.platform(),
"processor": platform.processor()
}
return result
# 使用示例
def test_ort_cpu():
session = ort.InferenceSession("yolo26n.onnx",
providers=["CPUExecutionProvider"])
dummy = np.random.randn(1, 3, 640, 640).astype(np.float32)
return benchmark_model(
lambda x: session.run(None, {"images": x}),
dummy,
model_name="YOLO26n ONNX Runtime CPU",
device_info="Intel i7-12700, 16GB DDR5"
)
result = test_ort_cpu()
print(json.dumps(result, indent=2))
|