NET461使用onnx模型

约 2901 字大约 10 分钟

2024-10-17

net6以上的好处理，主要net461需要纯手写

流程

初始化读取模型
入参适配
识别出参转换

添加库：Microsoft.ML.OnnxRuntime

核心

//结果解析
//计算过程涉及到图像预处理和模型输出的特性。
/*
*1. 模型输入和输出:
    - YOLOv5模型通常接受固定大小的输入（这里是1216x1216）。
    - 模型输出的坐标是相对于这个输入尺寸的归一化值（0到1之间）。
2. 图像预处理:
    - 原始图像（3088x2064）被缩放以适应模型输入尺寸，同时保持宽高比。
    - 缩放后可能会有填充（padding）以达到精确的输入尺寸。
3. 转换步骤:  
    a. 取消归一化:
	    - (output[0, i, 0], output[0, i, 1]) 是边界框中心的x和y坐标。
	    - (output[0, i, 2], output[0, i, 3]) 是边界框的宽和高。
    b. 移除填充:
	    - 减去xPad和yPad移除在预处理时添加的填充。
    c. 缩放回原始尺寸:
	    - 除以xGain和yGain将坐标从模型输入尺寸缩放回原始图像尺寸。

为什么点位要这么算：
	1. 最重要的一点他是归一化 输出的[0]是中心点的x坐标,[1]是中心点的y坐标
	2. [2],[3]分别是边界的宽高
	所以xMin = [0]+[2]/2-pad/xGain,即x的中心坐标-宽度的一半-偏移量除以缩放率就得到了x坐标，y坐标同理
*/
public float Confidence { get; protected set; } = 0.20f;
public float MulConfidence { get; protected set; } = 0.35f;
public float Overlap { get; protected set; } = 0.45f;
private List<YoloV5Result> ParseOutput(Tensor<float> output, float confThreshold)
{
    var results = new List<YoloV5Result>();
    var batch_size = output.Dimensions[0];//1
    var ModelOutputCount = output.Dimensions[1];
    var ModelOutputDimensions = output.Dimensions[2];//6

    var (ModelInputWidth, ModelInputHeight) = (1216, 1216);

    var (w, h) = (3088, 2064); // 图片原始大小
    var (xGain, yGain) = (ModelInputWidth / (float)w, ModelInputHeight / (float)h); // x, y gains
    var (xPad, yPad) = ((ModelInputWidth - w * xGain) / 2, (ModelInputHeight - h * yGain) / 2); // left, right pads

    Parallel.For(0, ModelOutputCount, (i) => //一共有多少组 每一组有ModelOutputDimensions(6)个数据
    {
        if (output[0, i, 4] <= Confidence) return; // 筛选掉置信度低的

        Parallel.For(5, ModelOutputDimensions, (j) =>
        {
            output[0, i, j] = output[0, i, j] * output[0, i, 4];
        });

        Parallel.For(5, ModelOutputDimensions, (k) =>
        {
            if (output[0, i, k] <= MulConfidence) return; // 当前置信度

            // 将模型输出的边界框坐标转换回原始图像尺寸，并去除填充 为什么这么算
            float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / xGain; 
            float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / yGain; 
            float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / xGain; 
            float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / yGain; 

            //将边界坐标限制在图像内
            xMin = Clamp(xMin, 0, w - 0);
            yMin = Clamp(yMin, 0, h - 0);
            xMax = Clamp(xMax, 0, w - 1);
            yMax = Clamp(yMax, 0, h - 1);

            results.Add(new YoloV5Result()
            {
                ClassName = _classes[0], // 假设只有一个类别 "mark"
                Confidence = output[0, i, k],
                Box = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
            });
        });
    });
    return results;
}

float Clamp(float value, float min, float max)
{
    return (value < min) ? min : (value > max) ? max : value;
}

完整

public class Yolov5onnx
{
    public string InputName { get; set; }
    public int ModelInputWidth { get; set; }
    public int ModelInputHeight { get; set; }
    public void TestModel()
    {
        // 加载ONNX模型
        string modelPath = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "new.onnx");
        var sessionOptions = new SessionOptions();
        sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
        var session = new InferenceSession(modelPath, sessionOptions);
        GetInputDetails(session);

        // 加载图像
        string imagePath = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "locateorigin.jpg");
        var image = new Bitmap(imagePath);

        // 预处理图像
        var input = PreprocessImage(image);
        var inputTensor = new DenseTensor<float>(input.ToArray(), new[] { 1, 3, ModelInputWidth, ModelInputHeight });
        var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor(InputName, inputTensor) };
        var results = session.Run(inputs);
        var classs = new string[] { "mark" };
        // 后处理结果
        var output = results.First().AsTensor<float>();
        var detections = PostprocessDetections(output, image, classs);
        detections = NonMaxSuppression(detections).ToList();
        Mat mat = new Mat(imagePath);
        // 输出结果
        foreach (var detection in detections)
        {
            Console.WriteLine($"Class: {detection.Class}, Confidence: {detection.Confidence}, Bounds: {detection.Bounds}");
            Cv2.Rectangle(mat, new OpenCvSharp.Rect((int)detection.Bounds.X, (int)detection.Bounds.Y, (int)detection.Bounds.Width, (int)detection.Bounds.Height), Scalar.Red, 2);
            Cv2.PutText(mat, $"Conf: {detection.Confidence:F2}",
                            new OpenCvSharp.Point(detection.Bounds.X, detection.Bounds.Y - 10),
                            HersheyFonts.HersheySimplex, 0.9, Scalar.Red, 2);
        }
        Cv2.NamedWindow("a", WindowFlags.Normal);
        Cv2.ResizeWindow("a", 1500, 800);
        Cv2.ImShow("a", mat);
    }

    private void GetInputDetails(InferenceSession session)
    {
        IReadOnlyDictionary<string, NodeMetadata> inputMeta = session.InputMetadata;
        InputName = inputMeta.Keys.First();
        var dimensions = inputMeta[InputName].Dimensions;
        (ModelInputWidth, ModelInputHeight) = (dimensions[2], dimensions[3]);
    }

    Tensor<float> PreprocessImage(Bitmap image)
    {
        Bitmap resized = null;
        if (image.Width != ModelInputWidth || image.Height != ModelInputHeight)
            resized = new Bitmap(image, new System.Drawing.Size(ModelInputWidth, ModelInputHeight));
        else
            resized = image;


        var input = new DenseTensor<float>(new[] { 1, 3, ModelInputWidth, ModelInputHeight });

        for (int y = 0; y < resized.Height; y++)
        {
            for (int x = 0; x < resized.Width; x++)
            {
                var color = resized.GetPixel(x, y);
                input[0, 0, y, x] = ConvertToFloat16Range(color.R / 255.0f);
                input[0, 1, y, x] = ConvertToFloat16Range(color.G / 255.0f);
                input[0, 2, y, x] = ConvertToFloat16Range(color.B / 255.0f);
            }
        }

        return input;
    }
    static float ConvertToFloat16Range(float value)
    {
        // 将 float32 限制在 float16 的范围内
        const float maxFloat16 = 65504.0f;
        return Math.Max(-maxFloat16, Math.Min(maxFloat16, value));
    }


    List<Detection> PostprocessDetections(Tensor<float> output, Image image, string[] classs)
    {
        List<Detection> results = new List<Detection>();

        float Confidence = 0.20f;
        float MulConfidence = 0.35f;

        var batch_size = output.Dimensions[0];//1
        var ModelOutputCount = output.Dimensions[1];
        var ModelOutputDimensions = output.Dimensions[2];//6

        var (w, h) = (image.Width, image.Height); // 图片原始大小
        var (xGain, yGain) = (ModelInputWidth / (float)w, ModelInputHeight / (float)h); // x, y gains
        var (xPad, yPad) = ((ModelInputWidth - w * xGain) / 2, (ModelInputHeight - h * yGain) / 2); // left, right pads

        Parallel.For(0, ModelOutputCount, (i) => //一共有多少组 每一组有ModelOutputDimensions(6)个数据
        {
            if (output[0, i, 4] <= Confidence) return; // 筛选掉置信度低的

            Parallel.For(5, ModelOutputDimensions, (j) =>
            {
                output[0, i, j] = output[0, i, j] * output[0, i, 4];
            });

            Parallel.For(5, ModelOutputDimensions, (k) =>
            {
                if (output[0, i, k] <= MulConfidence) return; // 当前置信度

                // 将模型输出的边界框坐标转换回原始图像尺寸
                float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / xGain;
                float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / yGain;
                float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / xGain;
                float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / yGain;

                //将边界坐标限制在图像内
                xMin = Clamp(xMin, 0, w - 0);
                yMin = Clamp(yMin, 0, h - 0);
                xMax = Clamp(xMax, 0, w - 1);
                yMax = Clamp(yMax, 0, h - 1);

                results.Add(new Detection()
                {
                    Class = classs[0], // 假设只有一个类别 "mark"
                    Confidence = output[0, i, k],
                    Bounds = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
                });
            });
        });
        return results;
    }
    float Clamp(float value, float min, float max) =>
     (value < min) ? min :
        (value > max) ? max : value;

    private IEnumerable<Detection> NonMaxSuppression(List<Detection> predictions, float iouThreshold = 0.45f)
    {
        var results = new List<Detection>();

        foreach (var classGroup in predictions.GroupBy(p => p.Class).OrderByDescending(g => g.Key))
        {
            var classResults = classGroup.ToList();
            while (classResults.Any())
            {
                var result = classResults.OrderByDescending(r => r.Confidence).First();
                results.Add(result);
                classResults.Remove(result);

                classResults = classResults.Where(r => CalculateIoU(result.Bounds, r.Bounds) < iouThreshold).ToList();
            }
        }

        return results;
    }

    private float CalculateIoU(RectangleF box1, RectangleF box2)
    {
        var intersectionArea = RectangleF.Intersect(box1, box2).Width * RectangleF.Intersect(box1, box2).Height;
        var unionArea = box1.Width * box1.Height + box2.Width * box2.Height - intersectionArea;
        return intersectionArea / unionArea;
    }
}
class Detection
{
    public RectangleF Bounds { get; set; }
    public string Class { get; set; }
    public float Confidence { get; set; }
}

优化版本

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Runtime.InteropServices;
using System.Diagnostics;
using System.IO;

namespace WpfApp1
{
    public class YoloDetector : IDisposable
    {
        private readonly InferenceSession _session;
        private string[] _labels;
        private Size _inputSize;
        private float _version;
        private bool _disposed;
        private YoloArchitecture _architecture;

        // 添加默认配置
        private static readonly SessionOptions DefaultSessionOptions = new SessionOptions
        {
            EnableMemoryPattern = true,
            ExecutionMode = ExecutionMode.ORT_PARALLEL,
            GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL
        };

        public YoloDetector(string modelPath, SessionOptions sessionOptions = null)
        {
            if (string.IsNullOrEmpty(modelPath))
                throw new ArgumentNullException(nameof(modelPath));

            if (!File.Exists(modelPath))
                throw new FileNotFoundException("模型文件未找到！", modelPath);

            try
            {
                // 使用提供的配置或默认配置
                sessionOptions ??= DefaultSessionOptions;

                // 尝试启用CUDA
                try
                {
                    sessionOptions.AppendExecutionProvider_CUDA(0);
                    Debug.WriteLine("CUDA 启用成功");
                }
                catch (Exception ex)
                {
                    Debug.WriteLine($"CUDA 初始化失败: {ex.Message}. 使用CPU.");
                    sessionOptions.AppendExecutionProvider_CPU();
                }

                // 初始化推理会话
                _session = new InferenceSession(modelPath, sessionOptions);

                // 从模型元数据中读取配置
                InitializeFromMetadata();
            }
            catch (Exception ex)
            {
                throw new InvalidOperationException("初始化失败！", ex);
            }
        }



        private void InitializeFromMetadata()
        {
            var output0 = _session.OutputMetadata["output0"];
            if (output0.Dimensions[2] == 6) _architecture = YoloArchitecture.YoloV5;
            else _architecture = YoloArchitecture.YoloV8Or11;


            _inputSize = new Size(_session.InputMetadata.First().Value.Dimensions[2],
                _session.InputMetadata.First().Value.Dimensions[3]);

            //var dimensions = imgsz.Trim('[', ']').Split(',').Select(x => int.Parse(x.Trim())).ToArray();
            //_inputSize = new Size(dimensions[0], dimensions[1]);

            var metadata = _session.ModelMetadata.CustomMetadataMap;
            if (!metadata.TryGetValue("names", out var names))
                throw new InvalidOperationException("模型元数据缺少'names'字段");

            _labels = names.Trim('{', '}').Split(',').Select(name => name.Split(':')[1].Trim('\'', ' ', '"')).ToArray();

        }

        // 优化图像预处理
        private Tensor<float> PreprocessImage(Image image)
        {
            // 调整图像大小并保持比例
            var resized = ResizeImage(image, _inputSize.Width, _inputSize.Height);

            // 转换为RGB格式并归一化
            var tensor = new DenseTensor<float>(new[] { 1, 3, _inputSize.Height, _inputSize.Width });
            using (var bitmap = new Bitmap(resized))
            {
                for (int y = 0; y < bitmap.Height; y++)
                {
                    for (int x = 0; x < bitmap.Width; x++)
                    {
                        var pixel = bitmap.GetPixel(x, y);
                        // 归一化到0-1范围并赋值到Tensor
                        tensor[0, 0, y, x] = pixel.R / 255f;
                        tensor[0, 1, y, x] = pixel.G / 255f;
                        tensor[0, 2, y, x] = pixel.B / 255f;
                    }
                }
            }
            return tensor;
        }

        // 统一的检测入口
        public IList<Prediction> Detect(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
        {
            return _architecture switch
            {
                YoloArchitecture.YoloV5 => DetectV5(image, confidenceThreshold, iouThreshold),
                YoloArchitecture.YoloV8Or11 => DetectV11(image, confidenceThreshold, iouThreshold),
                _ => throw new ArgumentException("不支持的架构")
            };
        }

        // YOLOv5 推理实现
        private IList<Prediction> DetectV5(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
        {
            // 预处理
            var inputTensor = PreprocessImage(image);

            var inputs = new List<NamedOnnxValue>
            {
                NamedOnnxValue.CreateFromTensor(_session.InputMetadata.First().Key, inputTensor)
            };

            // 运行推理
            using var results = _session.Run(inputs);
            var output = results.FirstOrDefault()?.AsTensor<float>();
            if (output == null) throw new Exception("推理失败");

            // 后处理：解析输出、应用NMS
            var predictions = ParseOutputV5(output, image.Size, confidenceThreshold);
            return ApplyNms(predictions, iouThreshold);
        }

        // YOLOv5 输出解析
        private List<Prediction> ParseOutputV5(Tensor<float> output, Size originalSize, float confidenceThreshold)
        {
            var predictions = new ConcurrentBag<Prediction>();

            // YOLOv5输出维度为 [1, num_boxes, 5+num_classes]
            int numClasses = output.Dimensions[2] - 5; // 减去5(x,y,w,h,obj)
            int numBoxes = output.Dimensions[1];

            Parallel.For(0, numBoxes, i =>
            {
                float confidence = output[0, i, 4];
                if (confidence >= confidenceThreshold)
                {
                    // 获取类别概率
                    float maxClassScore = 0;
                    int maxClassIndex = 0;
                    for (int j = 0; j < numClasses; j++)
                    {
                        float score = output[0, i, 5 + j];
                        if (score > maxClassScore)
                        {
                            maxClassScore = score;
                            maxClassIndex = j;
                        }
                    }

                    float finalScore = confidence * maxClassScore;
                    if (finalScore >= confidenceThreshold)
                    {
                        // 获取边界框坐标
                        float x = output[0, i, 0];
                        float y = output[0, i, 1];
                        float w = output[0, i, 2];
                        float h = output[0, i, 3];

                        // 转换为原始图像坐标
                        var rect = ConvertToOriginalSize(
                            new RectangleF(x - w / 2, y - h / 2, w, h),
                            originalSize,
                            _inputSize
                        );

                        predictions.Add(new Prediction
                        {
                            Rectangle = rect,
                            Confidence = finalScore,
                            Label = _labels[maxClassIndex]
                        });
                    }
                }
            });

            return predictions.ToList();
        }

        // 执行推理
        private IList<Prediction> DetectV11(Image image, float confidenceThreshold = 0.5f, float iouThreshold = 0.5f)
        {
            // 预处理
            var inputTensor = PreprocessImage(image);

            // 从模型元数据获取输入名称
            var inputMetadata = _session.InputMetadata.First();
            var inputs = new List<NamedOnnxValue>
            {
                NamedOnnxValue.CreateFromTensor(inputMetadata.Key, inputTensor)
            };

            // 运行推理
            using var results = _session.Run(inputs);

            // 获取输出Tensor
            var output = results.FirstOrDefault()?.AsTensor<float>();
            if (output == null) throw new Exception("推理失败");

            // 后处理：解析输出、应用NMS
            var predictions = ParseOutput(output, image.Size, confidenceThreshold);
            return ApplyNms(predictions, iouThreshold);
        }

        // 解析模型输出
        private List<Prediction> ParseOutput(Tensor<float> output, Size originalSize, float confidenceThreshold)
        {
            var predictions = new ConcurrentBag<Prediction>();

            // 获取输出维度
            int numClasses = output.Dimensions[1] - 4; // 减去4个bbox坐标，剩下的是类别数
            int numDetections = output.Dimensions[2];   // 检测框的数量（如33600）

            // 遍历所有检测框
            //for (int i = 0; i < numDetections; i++)
            Parallel.For(0, numDetections, i =>
            {
                // 获取每个类别的置信度
                float maxConfidence = float.MinValue;
                int bestClassId = -1;

                // 遍历所有类别找出最高置信度及其对应的类别
                for (int j = 0; j < numClasses; j++)
                {
                    float confidence = output[0, 4 + j, i];
                    if (confidence > maxConfidence)
                    {
                        maxConfidence = confidence;
                        bestClassId = j;
                    }
                }

                if (maxConfidence >= confidenceThreshold)
                {
                    // 解析边界框坐标（xywh格式）
                    var x = output[0, 0, i];
                    var y = output[0, 1, i];
                    var w = output[0, 2, i];
                    var h = output[0, 3, i];

                    // 转换为原始图像坐标
                    var rect = ConvertToOriginalSize(new RectangleF(x - w / 2, y - h / 2, w, h),
                            originalSize, _inputSize);

                    predictions.Add(new Prediction
                    {
                        Rectangle = rect,
                        Confidence = maxConfidence,
                        Label = _labels[bestClassId]
                    });
                }
            });
            return predictions.ToList();
        }

        // 非极大值抑制
        private List<Prediction> ApplyNms(IEnumerable<Prediction> predictions, float iouThreshold)
        {
            var results = new List<Prediction>();
            var ordered = predictions.OrderByDescending(p => p.Confidence).ToList();

            while (ordered.Count > 0)
            {
                var current = ordered[0];
                results.Add(current);

                ordered.RemoveAt(0);

                // 计算IoU并移除重叠框
                for (int i = ordered.Count - 1; i >= 0; i--)
                {
                    var iou = CalculateIoU(current.Rectangle, ordered[i].Rectangle);
                    if (iou > iouThreshold) ordered.RemoveAt(i);
                }
            }
            return results;
        }

        // 辅助方法：调整图像大小保持比例
        private static Image ResizeImage(Image image, int targetWidth, int targetHeight)
        {
            // 计算缩放比例
            float scale = Math.Min((float)targetWidth / image.Width, (float)targetHeight / image.Height);

            // 计算新的尺寸
            int newWidth = (int)(image.Width * scale);
            int newHeight = (int)(image.Height * scale);

            // 创建目标图像（使用灰色背景）
            var resized = new Bitmap(targetWidth, targetHeight);
            using (var graphics = Graphics.FromImage(resized))
            {
                // 填充灰色背景
                graphics.FillRectangle(Brushes.Gray, 0, 0, targetWidth, targetHeight);

                // 计算居中位置
                int x = (targetWidth - newWidth) / 2;
                int y = (targetHeight - newHeight) / 2;

                // 设置高质量插值模式
                graphics.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
                graphics.DrawImage(image, x, y, newWidth, newHeight);
            }

            return resized;
        }

        // 辅助方法：坐标转换到原始图像尺寸
        private static RectangleF ConvertToOriginalSize(RectangleF rect, Size originalSize, Size modelInputSize)
        {
            float scale = Math.Min((float)modelInputSize.Width / originalSize.Width,
                                  (float)modelInputSize.Height / originalSize.Height);

            // 计算填充量
            float padX = (modelInputSize.Width - originalSize.Width * scale) / 2;
            float padY = (modelInputSize.Height - originalSize.Height * scale) / 2;

            // 反向转换坐标
            return new RectangleF(
                (rect.X - padX) / scale,
                (rect.Y - padY) / scale,
                rect.Width / scale,
                rect.Height / scale
            );
        }

        // 辅助方法：计算IoU
        private static float CalculateIoU(RectangleF a, RectangleF b)
        {
            // 计算交集区域
            float intersectLeft = Math.Max(a.Left, b.Left);
            float intersectTop = Math.Max(a.Top, b.Top);
            float intersectRight = Math.Min(a.Right, b.Right);
            float intersectBottom = Math.Min(a.Bottom, b.Bottom);

            // 如果没有交集，返回0
            if (intersectRight < intersectLeft || intersectBottom < intersectTop)
                return 0;

            // 计算交集面积
            float intersectionArea = (intersectRight - intersectLeft) * (intersectBottom - intersectTop);

            // 计算两个矩形的面积
            float areaA = a.Width * a.Height;
            float areaB = b.Width * b.Height;

            // 计算并集面积
            float unionArea = areaA + areaB - intersectionArea;

            // 返回IoU
            return intersectionArea / unionArea;
        }

        // 实现正确的 Dispose 模式
        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        protected virtual void Dispose(bool disposing)
        {
            if (!_disposed)
            {
                if (disposing)
                {
                    _session?.Dispose();
                }
                _disposed = true;
            }
        }

        ~YoloDetector()
        {
            Dispose(false);
        }
    }

    public class Prediction
    {
        public RectangleF Rectangle { get; set; }
        public float Confidence { get; set; }
        public string Label { get; set; }
    }

    public enum YoloArchitecture
    {
        YoloV5,
        YoloV8Or11
    }
}