外观
NET461使用onnx模型
net6以上的好处理,主要net461需要纯手写
流程
- 初始化读取模型
- 入参适配
- 识别出参转换
添加库:Microsoft.ML.OnnxRuntime
核心
//结果解析
//计算过程涉及到图像预处理和模型输出的特性。
/*
*1. 模型输入和输出:
- YOLOv5模型通常接受固定大小的输入(这里是1216x1216)。
- 模型输出的坐标是相对于这个输入尺寸的归一化值(0到1之间)。
2. 图像预处理:
- 原始图像(3088x2064)被缩放以适应模型输入尺寸,同时保持宽高比。
- 缩放后可能会有填充(padding)以达到精确的输入尺寸。
3. 转换步骤:
a. 取消归一化:
- (output[0, i, 0], output[0, i, 1]) 是边界框中心的x和y坐标。
- (output[0, i, 2], output[0, i, 3]) 是边界框的宽和高。
b. 移除填充:
- 减去xPad和yPad移除在预处理时添加的填充。
c. 缩放回原始尺寸:
- 除以xGain和yGain将坐标从模型输入尺寸缩放回原始图像尺寸。
为什么点位要这么算:
1. 最重要的一点他是归一化 输出的[0]是中心点的x坐标,[1]是中心点的y坐标
2. [2],[3]分别是边界的宽高
所以xMin = [0]+[2]/2-pad/xGain,即x的中心坐标-宽度的一半-偏移量除以缩放率就得到了x坐标,y坐标同理
*/
public float Confidence { get; protected set; } = 0.20f;
public float MulConfidence { get; protected set; } = 0.35f;
public float Overlap { get; protected set; } = 0.45f;
private List<YoloV5Result> ParseOutput(Tensor<float> output, float confThreshold)
{
var results = new List<YoloV5Result>();
var batch_size = output.Dimensions[0];//1
var ModelOutputCount = output.Dimensions[1];
var ModelOutputDimensions = output.Dimensions[2];//6
var (ModelInputWidth, ModelInputHeight) = (1216, 1216);
var (w, h) = (3088, 2064); // 图片原始大小
var (xGain, yGain) = (ModelInputWidth / (float)w, ModelInputHeight / (float)h); // x, y gains
var (xPad, yPad) = ((ModelInputWidth - w * xGain) / 2, (ModelInputHeight - h * yGain) / 2); // left, right pads
Parallel.For(0, ModelOutputCount, (i) => //一共有多少组 每一组有ModelOutputDimensions(6)个数据
{
if (output[0, i, 4] <= Confidence) return; // 筛选掉置信度低的
Parallel.For(5, ModelOutputDimensions, (j) =>
{
output[0, i, j] = output[0, i, j] * output[0, i, 4];
});
Parallel.For(5, ModelOutputDimensions, (k) =>
{
if (output[0, i, k] <= MulConfidence) return; // 当前置信度
// 将模型输出的边界框坐标转换回原始图像尺寸,并去除填充 为什么这么算
float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / xGain;
float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / yGain;
float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / xGain;
float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / yGain;
//将边界坐标限制在图像内
xMin = Clamp(xMin, 0, w - 0);
yMin = Clamp(yMin, 0, h - 0);
xMax = Clamp(xMax, 0, w - 1);
yMax = Clamp(yMax, 0, h - 1);
results.Add(new YoloV5Result()
{
ClassName = _classes[0], // 假设只有一个类别 "mark"
Confidence = output[0, i, k],
Box = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
});
});
});
return results;
}
float Clamp(float value, float min, float max)
{
return (value < min) ? min : (value > max) ? max : value;
}完整
public class Yolov5onnx
{
public string InputName { get; set; }
public int ModelInputWidth { get; set; }
public int ModelInputHeight { get; set; }
public void TestModel()
{
// 加载ONNX模型
string modelPath = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "new.onnx");
var sessionOptions = new SessionOptions();
sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
var session = new InferenceSession(modelPath, sessionOptions);
GetInputDetails(session);
// 加载图像
string imagePath = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "locateorigin.jpg");
var image = new Bitmap(imagePath);
// 预处理图像
var input = PreprocessImage(image);
var inputTensor = new DenseTensor<float>(input.ToArray(), new[] { 1, 3, ModelInputWidth, ModelInputHeight });
var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor(InputName, inputTensor) };
var results = session.Run(inputs);
var classs = new string[] { "mark" };
// 后处理结果
var output = results.First().AsTensor<float>();
var detections = PostprocessDetections(output, image, classs);
detections = NonMaxSuppression(detections).ToList();
Mat mat = new Mat(imagePath);
// 输出结果
foreach (var detection in detections)
{
Console.WriteLine($"Class: {detection.Class}, Confidence: {detection.Confidence}, Bounds: {detection.Bounds}");
Cv2.Rectangle(mat, new OpenCvSharp.Rect((int)detection.Bounds.X, (int)detection.Bounds.Y, (int)detection.Bounds.Width, (int)detection.Bounds.Height), Scalar.Red, 2);
Cv2.PutText(mat, $"Conf: {detection.Confidence:F2}",
new OpenCvSharp.Point(detection.Bounds.X, detection.Bounds.Y - 10),
HersheyFonts.HersheySimplex, 0.9, Scalar.Red, 2);
}
Cv2.NamedWindow("a", WindowFlags.Normal);
Cv2.ResizeWindow("a", 1500, 800);
Cv2.ImShow("a", mat);
}
private void GetInputDetails(InferenceSession session)
{
IReadOnlyDictionary<string, NodeMetadata> inputMeta = session.InputMetadata;
InputName = inputMeta.Keys.First();
var dimensions = inputMeta[InputName].Dimensions;
(ModelInputWidth, ModelInputHeight) = (dimensions[2], dimensions[3]);
}
Tensor<float> PreprocessImage(Bitmap image)
{
Bitmap resized = null;
if (image.Width != ModelInputWidth || image.Height != ModelInputHeight)
resized = new Bitmap(image, new System.Drawing.Size(ModelInputWidth, ModelInputHeight));
else
resized = image;
var input = new DenseTensor<float>(new[] { 1, 3, ModelInputWidth, ModelInputHeight });
for (int y = 0; y < resized.Height; y++)
{
for (int x = 0; x < resized.Width; x++)
{
var color = resized.GetPixel(x, y);
input[0, 0, y, x] = ConvertToFloat16Range(color.R / 255.0f);
input[0, 1, y, x] = ConvertToFloat16Range(color.G / 255.0f);
input[0, 2, y, x] = ConvertToFloat16Range(color.B / 255.0f);
}
}
return input;
}
static float ConvertToFloat16Range(float value)
{
// 将 float32 限制在 float16 的范围内
const float maxFloat16 = 65504.0f;
return Math.Max(-maxFloat16, Math.Min(maxFloat16, value));
}
List<Detection> PostprocessDetections(Tensor<float> output, Image image, string[] classs)
{
List<Detection> results = new List<Detection>();
float Confidence = 0.20f;
float MulConfidence = 0.35f;
var batch_size = output.Dimensions[0];//1
var ModelOutputCount = output.Dimensions[1];
var ModelOutputDimensions = output.Dimensions[2];//6
var (w, h) = (image.Width, image.Height); // 图片原始大小
var (xGain, yGain) = (ModelInputWidth / (float)w, ModelInputHeight / (float)h); // x, y gains
var (xPad, yPad) = ((ModelInputWidth - w * xGain) / 2, (ModelInputHeight - h * yGain) / 2); // left, right pads
Parallel.For(0, ModelOutputCount, (i) => //一共有多少组 每一组有ModelOutputDimensions(6)个数据
{
if (output[0, i, 4] <= Confidence) return; // 筛选掉置信度低的
Parallel.For(5, ModelOutputDimensions, (j) =>
{
output[0, i, j] = output[0, i, j] * output[0, i, 4];
});
Parallel.For(5, ModelOutputDimensions, (k) =>
{
if (output[0, i, k] <= MulConfidence) return; // 当前置信度
// 将模型输出的边界框坐标转换回原始图像尺寸
float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / xGain;
float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / yGain;
float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / xGain;
float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / yGain;
//将边界坐标限制在图像内
xMin = Clamp(xMin, 0, w - 0);
yMin = Clamp(yMin, 0, h - 0);
xMax = Clamp(xMax, 0, w - 1);
yMax = Clamp(yMax, 0, h - 1);
results.Add(new Detection()
{
Class = classs[0], // 假设只有一个类别 "mark"
Confidence = output[0, i, k],
Bounds = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
});
});
});
return results;
}
float Clamp(float value, float min, float max) =>
(value < min) ? min :
(value > max) ? max : value;
private IEnumerable<Detection> NonMaxSuppression(List<Detection> predictions, float iouThreshold = 0.45f)
{
var results = new List<Detection>();
foreach (var classGroup in predictions.GroupBy(p => p.Class).OrderByDescending(g => g.Key))
{
var classResults = classGroup.ToList();
while (classResults.Any())
{
var result = classResults.OrderByDescending(r => r.Confidence).First();
results.Add(result);
classResults.Remove(result);
classResults = classResults.Where(r => CalculateIoU(result.Bounds, r.Bounds) < iouThreshold).ToList();
}
}
return results;
}
private float CalculateIoU(RectangleF box1, RectangleF box2)
{
var intersectionArea = RectangleF.Intersect(box1, box2).Width * RectangleF.Intersect(box1, box2).Height;
var unionArea = box1.Width * box1.Height + box2.Width * box2.Height - intersectionArea;
return intersectionArea / unionArea;
}
}
class Detection
{
public RectangleF Bounds { get; set; }
public string Class { get; set; }
public float Confidence { get; set; }
}优化版本
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Runtime.InteropServices;
using System.Diagnostics;
using System.IO;
namespace WpfApp1
{
public class YoloDetector : IDisposable
{
private readonly InferenceSession _session;
private string[] _labels;
private Size _inputSize;
private float _version;
private bool _disposed;
private YoloArchitecture _architecture;
// 添加默认配置
private static readonly SessionOptions DefaultSessionOptions = new SessionOptions
{
EnableMemoryPattern = true,
ExecutionMode = ExecutionMode.ORT_PARALLEL,
GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL
};
public YoloDetector(string modelPath, SessionOptions sessionOptions = null)
{
if (string.IsNullOrEmpty(modelPath))
throw new ArgumentNullException(nameof(modelPath));
if (!File.Exists(modelPath))
throw new FileNotFoundException("模型文件未找到!", modelPath);
try
{
// 使用提供的配置或默认配置
sessionOptions ??= DefaultSessionOptions;
// 尝试启用CUDA
try
{
sessionOptions.AppendExecutionProvider_CUDA(0);
Debug.WriteLine("CUDA 启用成功");
}
catch (Exception ex)
{
Debug.WriteLine($"CUDA 初始化失败: {ex.Message}. 使用CPU.");
sessionOptions.AppendExecutionProvider_CPU();
}
// 初始化推理会话
_session = new InferenceSession(modelPath, sessionOptions);
// 从模型元数据中读取配置
InitializeFromMetadata();
}
catch (Exception ex)
{
throw new InvalidOperationException("初始化失败!", ex);
}
}
private void InitializeFromMetadata()
{
var output0 = _session.OutputMetadata["output0"];
if (output0.Dimensions[2] == 6) _architecture = YoloArchitecture.YoloV5;
else _architecture = YoloArchitecture.YoloV8Or11;
_inputSize = new Size(_session.InputMetadata.First().Value.Dimensions[2],
_session.InputMetadata.First().Value.Dimensions[3]);
//var dimensions = imgsz.Trim('[', ']').Split(',').Select(x => int.Parse(x.Trim())).ToArray();
//_inputSize = new Size(dimensions[0], dimensions[1]);
var metadata = _session.ModelMetadata.CustomMetadataMap;
if (!metadata.TryGetValue("names", out var names))
throw new InvalidOperationException("模型元数据缺少'names'字段");
_labels = names.Trim('{', '}').Split(',').Select(name => name.Split(':')[1].Trim('\'', ' ', '"')).ToArray();
}
// 优化图像预处理
private Tensor<float> PreprocessImage(Image image)
{
// 调整图像大小并保持比例
var resized = ResizeImage(image, _inputSize.Width, _inputSize.Height);
// 转换为RGB格式并归一化
var tensor = new DenseTensor<float>(new[] { 1, 3, _inputSize.Height, _inputSize.Width });
using (var bitmap = new Bitmap(resized))
{
for (int y = 0; y < bitmap.Height; y++)
{
for (int x = 0; x < bitmap.Width; x++)
{
var pixel = bitmap.GetPixel(x, y);
// 归一化到0-1范围并赋值到Tensor
tensor[0, 0, y, x] = pixel.R / 255f;
tensor[0, 1, y, x] = pixel.G / 255f;
tensor[0, 2, y, x] = pixel.B / 255f;
}
}
}
return tensor;
}
// 统一的检测入口
public IList<Prediction> Detect(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
{
return _architecture switch
{
YoloArchitecture.YoloV5 => DetectV5(image, confidenceThreshold, iouThreshold),
YoloArchitecture.YoloV8Or11 => DetectV11(image, confidenceThreshold, iouThreshold),
_ => throw new ArgumentException("不支持的架构")
};
}
// YOLOv5 推理实现
private IList<Prediction> DetectV5(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
{
// 预处理
var inputTensor = PreprocessImage(image);
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor(_session.InputMetadata.First().Key, inputTensor)
};
// 运行推理
using var results = _session.Run(inputs);
var output = results.FirstOrDefault()?.AsTensor<float>();
if (output == null) throw new Exception("推理失败");
// 后处理:解析输出、应用NMS
var predictions = ParseOutputV5(output, image.Size, confidenceThreshold);
return ApplyNms(predictions, iouThreshold);
}
// YOLOv5 输出解析
private List<Prediction> ParseOutputV5(Tensor<float> output, Size originalSize, float confidenceThreshold)
{
var predictions = new ConcurrentBag<Prediction>();
// YOLOv5输出维度为 [1, num_boxes, 5+num_classes]
int numClasses = output.Dimensions[2] - 5; // 减去5(x,y,w,h,obj)
int numBoxes = output.Dimensions[1];
Parallel.For(0, numBoxes, i =>
{
float confidence = output[0, i, 4];
if (confidence >= confidenceThreshold)
{
// 获取类别概率
float maxClassScore = 0;
int maxClassIndex = 0;
for (int j = 0; j < numClasses; j++)
{
float score = output[0, i, 5 + j];
if (score > maxClassScore)
{
maxClassScore = score;
maxClassIndex = j;
}
}
float finalScore = confidence * maxClassScore;
if (finalScore >= confidenceThreshold)
{
// 获取边界框坐标
float x = output[0, i, 0];
float y = output[0, i, 1];
float w = output[0, i, 2];
float h = output[0, i, 3];
// 转换为原始图像坐标
var rect = ConvertToOriginalSize(
new RectangleF(x - w / 2, y - h / 2, w, h),
originalSize,
_inputSize
);
predictions.Add(new Prediction
{
Rectangle = rect,
Confidence = finalScore,
Label = _labels[maxClassIndex]
});
}
}
});
return predictions.ToList();
}
// 执行推理
private IList<Prediction> DetectV11(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
{
// 预处理
var inputTensor = PreprocessImage(image);
// 从模型元数据获取输入名称
var inputMetadata = _session.InputMetadata.First();
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor(inputMetadata.Key, inputTensor)
};
// 运行推理
using var results = _session.Run(inputs);
// 获取输出Tensor
var output = results.FirstOrDefault()?.AsTensor<float>();
if (output == null) throw new Exception("推理失败");
// 后处理:解析输出、应用NMS
var predictions = ParseOutput(output, image.Size, confidenceThreshold);
return ApplyNms(predictions, iouThreshold);
}
// 解析模型输出
private List<Prediction> ParseOutput(Tensor<float> output, Size originalSize, float confidenceThreshold)
{
var predictions = new ConcurrentBag<Prediction>();
// 获取输出维度
int numClasses = output.Dimensions[1] - 4; // 减去4个bbox坐标,剩下的是类别数
int numDetections = output.Dimensions[2]; // 检测框的数量(如33600)
// 遍历所有检测框
//for (int i = 0; i < numDetections; i++)
Parallel.For(0, numDetections, i =>
{
// 获取每个类别的置信度
float maxConfidence = float.MinValue;
int bestClassId = -1;
// 遍历所有类别找出最高置信度及其对应的类别
for (int j = 0; j < numClasses; j++)
{
float confidence = output[0, 4 + j, i];
if (confidence > maxConfidence)
{
maxConfidence = confidence;
bestClassId = j;
}
}
if (maxConfidence >= confidenceThreshold)
{
// 解析边界框坐标(xywh格式)
var x = output[0, 0, i];
var y = output[0, 1, i];
var w = output[0, 2, i];
var h = output[0, 3, i];
// 转换为原始图像坐标
var rect = ConvertToOriginalSize(new RectangleF(x - w / 2, y - h / 2, w, h),
originalSize, _inputSize);
predictions.Add(new Prediction
{
Rectangle = rect,
Confidence = maxConfidence,
Label = _labels[bestClassId]
});
}
});
return predictions.ToList();
}
// 非极大值抑制
private List<Prediction> ApplyNms(IEnumerable<Prediction> predictions, float iouThreshold)
{
var results = new List<Prediction>();
var ordered = predictions.OrderByDescending(p => p.Confidence).ToList();
while (ordered.Count > 0)
{
var current = ordered[0];
results.Add(current);
ordered.RemoveAt(0);
// 计算IoU并移除重叠框
for (int i = ordered.Count - 1; i >= 0; i--)
{
var iou = CalculateIoU(current.Rectangle, ordered[i].Rectangle);
if (iou > iouThreshold) ordered.RemoveAt(i);
}
}
return results;
}
// 辅助方法:调整图像大小保持比例
private static Image ResizeImage(Image image, int targetWidth, int targetHeight)
{
// 计算缩放比例
float scale = Math.Min((float)targetWidth / image.Width, (float)targetHeight / image.Height);
// 计算新的尺寸
int newWidth = (int)(image.Width * scale);
int newHeight = (int)(image.Height * scale);
// 创建目标图像(使用灰色背景)
var resized = new Bitmap(targetWidth, targetHeight);
using (var graphics = Graphics.FromImage(resized))
{
// 填充灰色背景
graphics.FillRectangle(Brushes.Gray, 0, 0, targetWidth, targetHeight);
// 计算居中位置
int x = (targetWidth - newWidth) / 2;
int y = (targetHeight - newHeight) / 2;
// 设置高质量插值模式
graphics.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
graphics.DrawImage(image, x, y, newWidth, newHeight);
}
return resized;
}
// 辅助方法:坐标转换到原始图像尺寸
private static RectangleF ConvertToOriginalSize(RectangleF rect, Size originalSize, Size modelInputSize)
{
float scale = Math.Min((float)modelInputSize.Width / originalSize.Width,
(float)modelInputSize.Height / originalSize.Height);
// 计算填充量
float padX = (modelInputSize.Width - originalSize.Width * scale) / 2;
float padY = (modelInputSize.Height - originalSize.Height * scale) / 2;
// 反向转换坐标
return new RectangleF(
(rect.X - padX) / scale,
(rect.Y - padY) / scale,
rect.Width / scale,
rect.Height / scale
);
}
// 辅助方法:计算IoU
private static float CalculateIoU(RectangleF a, RectangleF b)
{
// 计算交集区域
float intersectLeft = Math.Max(a.Left, b.Left);
float intersectTop = Math.Max(a.Top, b.Top);
float intersectRight = Math.Min(a.Right, b.Right);
float intersectBottom = Math.Min(a.Bottom, b.Bottom);
// 如果没有交集,返回0
if (intersectRight < intersectLeft || intersectBottom < intersectTop)
return 0;
// 计算交集面积
float intersectionArea = (intersectRight - intersectLeft) * (intersectBottom - intersectTop);
// 计算两个矩形的面积
float areaA = a.Width * a.Height;
float areaB = b.Width * b.Height;
// 计算并集面积
float unionArea = areaA + areaB - intersectionArea;
// 返回IoU
return intersectionArea / unionArea;
}
// 实现正确的 Dispose 模式
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
_session?.Dispose();
}
_disposed = true;
}
}
~YoloDetector()
{
Dispose(false);
}
}
public class Prediction
{
public RectangleF Rectangle { get; set; }
public float Confidence { get; set; }
public string Label { get; set; }
}
public enum YoloArchitecture
{
YoloV5,
YoloV8Or11
}
}