ML.NET机器学习入门

SunnyFan大约 14 分钟约 4327 字

ML.NET机器学习入门

简介

ML.NET 是微软推出的跨平台开源机器学习框架，让 .NET 开发者无需学习 Python 或 R，就能在 C# 中完成机器学习任务。它支持分类、回归、聚类、推荐、异常检测等常见场景，并且可以集成 ONNX 模型和 TensorFlow 模型，实现深度学习推理。ML.NET 通过 AutoML 还能自动选择最佳算法和超参数，大幅降低机器学习的入门门槛。

特点

1.跨平台 — 支持 Windows、Linux、macOS
2.C# 原生 — 无需 Python，完全在 .NET 生态中开发
3.可扩展 — 支持 ONNX、TensorFlow 等外部模型
4.AutoML — 自动选择算法和调参
5.高性能 — 底层使用 C++ 实现，推理速度快

环境准备

安装 NuGet 包

Install-Package Microsoft.ML
Install-Package Microsoft.ML.AutoML
Install-Package Microsoft.ML.OnnxTransformer

基础概念

概念	说明
IDataView	ML.NET 的核心数据结构，类似数据库表
Estimator	数据转换或训练器的定义（"配方"）
Transformer	训练或转换后的模型（"结果"）
PredictionEngine	单条数据预测引擎
DataViewSchema	数据的列定义和类型信息

数据加载

从文件加载数据

/// <summary>
/// ML.NET 数据加载示例
/// </summary>
using Microsoft.ML;
using Microsoft.ML.Data;

// 创建 MLContext — ML.NET 的入口点
var mlContext = new MLContext(seed: 42);

// ===== 方式1：从 CSV 文件加载 =====
var dataView = mlContext.Data.LoadFromTextFile<SentimentData>(
    path: "sentiment-data.csv",
    hasHeader: true,
    separatorChar: ',');

// ===== 方式2：从 IEnumerable 加载 =====
var sampleData = new List<SentimentData>
{
    new SentimentData { SentimentText = "这部电影非常好看", Label = true },
    new SentimentData { SentimentText = "质量太差了", Label = false },
    new SentimentData { SentimentText = "服务态度很好，下次还来", Label = true },
    new SentimentData { SentimentText = "完全不值这个价格", Label = false },
};
var dataFromEnumerable = mlContext.Data.LoadFromEnumerable(sampleData);

// ===== 方式3：从数据库加载 =====
// var dataFromDb = mlContext.Data.CreateDatabaseLoader<SentimentData>()
//     .Load(new DatabaseSource("System.Data.SqlClient", connectionString,
//         "SELECT SentimentText, Label FROM SentimentData"));

数据模型定义

/// <summary>
/// 情感分析 — 输入数据模型
/// </summary>
public class SentimentData
{
    [LoadColumn(0)]
    public string SentimentText { get; set; }

    [LoadColumn(1), ColumnName("Label")]
    public bool Label { get; set; }  // true=正面, false=负面
}

/// <summary>
/// 情感分析 — 预测输出模型
/// </summary>
public class SentimentPrediction : PredictionData
{
    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    [ColumnName("Probabilities")]
    public float[] Scores { get; set; }

    public float Probability => Scores[1]; // 正面情感的概率
}

/// <summary>
/// 房价预测 — 输入数据模型（回归任务）
/// </summary>
public class HousingData
{
    [LoadColumn(0)]
    public float Size { get; set; }         // 面积（平方米）

    [LoadColumn(1)]
    public float Bedrooms { get; set; }     // 卧室数量

    [LoadColumn(2)]
    public float Bathrooms { get; set; }    // 卫生间数量

    [LoadColumn(3)]
    public float YearBuilt { get; set; }    // 建造年份

    [LoadColumn(4)]
    public float LocationScore { get; set; } // 地段评分 1-10

    [LoadColumn(5)]
    [ColumnName("Label")]
    public float Price { get; set; }        // 价格（万元）
}

/// <summary>
/// 房价预测 — 输出模型
/// </summary>
public class HousingPrediction
{
    [ColumnName("Score")]
    public float PredictedPrice { get; set; }
}

/// <summary>
/// 基类：预测结果
/// </summary>
public class PredictionData { }

数据转换

文本特征化处理

/// <summary>
/// 数据转换管道 — 文本处理
/// 将文本转换为数值特征，是 NLP 任务的核心步骤
/// </summary>
public static IEstimator<ITransformer> BuildTextPipeline(MLContext mlContext)
{
    var pipeline = mlContext.Transforms
        // 1. 自定义映射 — 添加特征列
        .CustomMapping<MyInput, MyOutput>(CustomMappingAction, contractName: null)

        // 2. 文本归一化 — 转小写、去标点
        .Append(mlContext.Transforms.Text.NormalizeText("NormalizedText", "SentimentText"))

        // 3. 分词 — 将文本拆分为单词数组
        .Append(mlContext.Transforms.Text.TokenizeIntoWords("Words", "NormalizedText"))

        // 4. 移除停用词
        .Append(mlContext.Transforms.Text.RemoveDefaultStopWords(
            "WordsNoStopWords", "Words",
            language: Microsoft.ML.Transforms.Text.StopWordsRemovingEstimator.Language.ChineseSimplified))

        // 5. 提取词向量 — 将单词转换为数值向量
        .Append(mlContext.Transforms.Text.ApplyWordEmbedding(
            "Features", "WordsNoStopWords",
            WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding))

        // 6. TF-IDF 特征提取
        .Append(mlContext.Transforms.Text.ProduceWordEmbeddings(
            outputColumnName: "TfIdfFeatures",
            inputColumnName: "WordsNoStopWords"));

    return pipeline;
}

private static void CustomMappingAction(MyInput input, MyOutput output)
{
    // 自定义特征计算逻辑
    output.TextLength = input.SentimentText?.Length ?? 0;
}

数值特征处理

/// <summary>
/// 数值数据转换管道
/// </summary>
public static IEstimator<ITransformer> BuildNumericPipeline(MLContext mlContext)
{
    var pipeline = mlContext.Transforms
        // 1. 拼接特征列 — 将多个数值列合并为一个 Features 向量
        .Concatenate("Features",
            nameof(HousingData.Size),
            nameof(HousingData.Bedrooms),
            nameof(HousingData.Bathrooms),
            nameof(HousingData.YearBuilt),
            nameof(HousingData.LocationScore))

        // 2. 归一化 — 将特征缩放到 [0,1] 范围
        .Append(mlContext.Transforms.NormalizeMinMax("Features"))

        // 3. 缺失值替换
        .Append(mlContext.Transforms.ReplaceMissingValues(
            "Features", replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean));

    return pipeline;
}

分类编码处理

/// <summary>
/// 分类变量编码
/// </summary>
public static IEstimator<ITransformer> BuildCategoricalPipeline(MLContext mlContext)
{
    var pipeline = mlContext.Transforms
        // One-Hot 编码 — 将分类变量转为二进制向量
        .Categorical.OneHotEncoding("CityEncoded", "City")
        .Categorical.OneHotEncoding("TypeEncoded", "HouseType")

        // 拼接所有特征
        .Concatenate("Features",
            "CityEncoded",
            "TypeEncoded",
            nameof(HousingData.Size),
            nameof(HousingData.Bedrooms));

    return pipeline;
}

public class MyInput
{
    public string SentimentText { get; set; }
    public string City { get; set; }
    public string HouseType { get; set; }
}

public class MyOutput
{
    public int TextLength { get; set; }
}

模型训练

二分类 — 情感分析

/// <summary>
/// 二分类模型训练 — 情感分析
/// </summary>
public class SentimentTrainer
{
    private readonly MLContext _mlContext;
    private ITransformer _trainedModel;
    private PredictionEngine<SentimentData, SentimentPrediction> _predictionEngine;

    public SentimentTrainer()
    {
        _mlContext = new MLContext(seed: 42);
    }

    /// <summary>
    /// 训练情感分析模型
    /// </summary>
    public void Train(string dataPath)
    {
        // 1. 加载数据
        var dataView = _mlContext.Data.LoadFromTextFile<SentimentData>(dataPath, hasHeader: true);

        // 2. 划分训练集和测试集（80/20）
        var dataSplit = _mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

        // 3. 构建训练管道
        var pipeline = _mlContext.Transforms.Text
            .FeaturizeText("Features", nameof(SentimentData.SentimentText))
            .Append(_mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
                labelColumnName: "Label",
                featureColumnName: "Features",
                maximumNumberOfIterations: 100));

        // 4. 训练模型
        Console.WriteLine("开始训练...");
        _trainedModel = pipeline.Fit(dataSplit.TrainSet);
        Console.WriteLine("训练完成");

        // 5. 评估模型
        var predictions = _trainedModel.Transform(dataSplit.TestSet);
        var metrics = _mlContext.BinaryClassification.Evaluate(predictions);

        Console.WriteLine($"准确率 (Accuracy):       {metrics.Accuracy:P2}");
        Console.WriteLine($"AUC:                     {metrics.AreaUnderRocCurve:P2}");
        Console.WriteLine($"F1分数 (F1Score):        {metrics.F1Score:P2}");
        Console.WriteLine($"正例精确率 (PosPrecision): {metrics.PositivePrecision:P2}");
        Console.WriteLine($"正例召回率 (PosRecall):    {metrics.PositiveRecall:P2}");

        // 6. 创建预测引擎
        _predictionEngine = _mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(_trainedModel);
    }

    /// <summary>
    /// 预测单条文本的情感
    /// </summary>
    public SentimentPrediction Predict(string text)
    {
        var input = new SentimentData { SentimentText = text };
        return _predictionEngine.Predict(input);
    }

    /// <summary>
    /// 保存模型
    /// </summary>
    public void SaveModel(string modelPath)
    {
        _mlContext.Model.Save(_trainedModel, null, modelPath);
    }

    /// <summary>
    /// 加载模型
    /// </summary>
    public void LoadModel(string modelPath)
    {
        _trainedModel = _mlContext.Model.Load(modelPath, out var schema);
        _predictionEngine = _mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(_trainedModel);
    }
}

回归 — 房价预测

/// <summary>
/// 回归模型训练 — 房价预测
/// </summary>
public class HousePriceTrainer
{
    private readonly MLContext _mlContext = new(seed: 42);
    private ITransformer _model;

    /// <summary>
    /// 训练模型并对比多种算法
    /// </summary>
    public RegressionMetrics TrainAndCompare(string dataPath)
    {
        var dataView = _mlContext.Data.LoadFromTextFile<HousingData>(dataPath, hasHeader: true);
        var split = _mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

        // 构建数据预处理管道（算法无关）
        var preprocessor = _mlContext.Transforms
            .Concatenate("Features",
                nameof(HousingData.Size),
                nameof(HousingData.Bedrooms),
                nameof(HousingData.Bathrooms),
                nameof(HousingData.YearBuilt),
                nameof(HousingData.LocationScore))
            .Append(mlContext.Transforms.NormalizeMinMax("Features"));

        // 对比多种回归算法
        var trainers = new Dictionary<string, IEstimator<ITransformer>>
        {
            ["SDCA"] = _mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100),
            ["L-BFGS"] = _mlContext.Regression.Trainers.LbfgsPoissonRegression(),
            ["FastTree"] = _mlContext.Regression.Trainers.FastTree(numberOfLeaves: 20, numberOfTrees: 100),
            ["FastForest"] = _mlContext.Regression.Trainers.FastForest(numberOfLeaves: 20, numberOfTrees: 100),
            ["LightGBM"] = _mlContext.Regression.Trainers.LightGbm(numberOfLeaves: 20, numberOfIterations: 100),
        };

        RegressionMetrics bestMetrics = null;
        string bestTrainer = "";
        double bestR2 = double.MinValue;

        foreach (var (name, trainer) in trainers)
        {
            var pipeline = preprocessor.Append(trainer);
            var model = pipeline.Fit(split.TrainSet);
            var predictions = model.Transform(split.TestSet);
            var metrics = _mlContext.Regression.Evaluate(predictions);

            Console.WriteLine($"{name,-12} | R2={metrics.RSquared:F4} | RMSE={metrics.RootMeanSquaredError:F2} | MAE={metrics.MeanAbsoluteError:F2}");

            if (metrics.RSquared > bestR2)
            {
                bestR2 = metrics.RSquared;
                bestMetrics = metrics;
                bestTrainer = name;
                _model = model;
            }
        }

        Console.WriteLine($"\n最佳算法：{bestTrainer}，R2={bestR2:F4}");
        return bestMetrics;
    }
}

聚类 — 客户分群

/// <summary>
/// 聚类模型训练 — 客户分群
/// 使用 K-Means 算法将客户按照消费行为分为不同群体
/// </summary>
public class CustomerClustering
{
    private readonly MLContext _mlContext = new(seed: 42);

    /// <summary>
    /// 客户数据模型
    /// </summary>
    public class CustomerData
    {
        public float Recency { get; set; }       // 最近一次消费距今天数
        public float Frequency { get; set; }     // 消费频次
        public float Monetary { get; set; }       // 消费金额
        public float AvgOrderValue { get; set; }  // 平均客单价
    }

    public class ClusterPrediction
    {
        [ColumnName("PredictedLabel")]
        public uint PredictedClusterId { get; set; }

        [ColumnName("Score")]
        public float[] Distances { get; set; }  // 到各聚类中心的距离
    }

    /// <summary>
    /// 训练 K-Means 聚类模型
    /// </summary>
    public void Train(IEnumerable<CustomerData> customers, int k = 4)
    {
        var dataView = _mlContext.Data.LoadFromEnumerable(customers);

        var pipeline = _mlContext.Transforms
            .Concatenate("Features",
                nameof(CustomerData.Recency),
                nameof(CustomerData.Frequency),
                nameof(CustomerData.Monetary),
                nameof(CustomerData.AvgOrderValue))
            .Append(_mlContext.Transforms.NormalizeMinMax("Features"))
            .Append(_mlContext.Clustering.Trainers.KMeans(
                featureColumnName: "Features",
                numberOfClusters: k));

        var model = pipeline.Fit(dataView);

        // 预测
        var predictor = _mlContext.Model.CreatePredictionEngine<CustomerData, ClusterPrediction>(model);
        var sample = new CustomerData { Recency = 5, Frequency = 20, Monetary = 5000, AvgOrderValue = 250 };
        var prediction = predictor.Predict(sample);

        Console.WriteLine($"客户被分到第 {prediction.PredictedClusterId} 组");
        Console.WriteLine($"到各组的距离：{string.Join(", ", prediction.Distances.Select(d => d.ToString("F2")))}");

        // 评估聚类效果
        var predictions = model.Transform(dataView);
        var metrics = _mlContext.Clustering.Evaluate(predictions);
        Console.WriteLine($"平均轮廓系数：{metrics.AverageSilhouette:F4}"); // 越接近1越好
    }
}

模型评估

评估指标说明

任务类型	评估指标	说明	理想值
二分类	Accuracy	凴确率	越接近1越好
二分类	AUC	ROC曲线下面积	>0.8较好
二分类	F1Score	精确率和召回率的调和平均	越接近1越好
回归	R-Squared (R2)	决定系数	越接近1越好
回归	RMSE	均方根误差	越小越好
回归	MAE	平均绝对误差	越小越好
聚类	Silhouette	轮廓系数	>0.5较好
多分类	MicroAccuracy	微平均准确率	越接近1越好
多分类	MacroAccuracy	宏平均准确率	越接近1越好

交叉验证

/// <summary>
/// 使用交叉验证评估模型稳定性
/// </summary>
public void CrossValidate(string dataPath)
{
    var mlContext = new MLContext(seed: 42);
    var dataView = mlContext.Data.LoadFromTextFile<HousingData>(dataPath, hasHeader: true);

    var pipeline = mlContext.Transforms
        .Concatenate("Features",
            nameof(HousingData.Size),
            nameof(HousingData.Bedrooms),
            nameof(HousingData.Bathrooms),
            nameof(HousingData.YearBuilt),
            nameof(HousingData.LocationScore))
        .Append(mlContext.Transforms.NormalizeMinMax("Features"))
        .Append(mlContext.Regression.Trainers.FastTree(numberOfTrees: 100));

    // 5折交叉验证
    var cvResults = mlContext.Regression.CrossValidate(dataView, pipeline, numberOfFolds: 5);

    Console.WriteLine("交叉验证结果：");
    Console.WriteLine($"平均 R2 = {cvResults.Average(r => r.Metrics.RSquared):F4}");
    Console.WriteLine($"平均 RMSE = {cvResults.Average(r => r.Metrics.RootMeanSquaredError):F2}");
    Console.WriteLine($"平均 MAE = {cvResults.Average(r => r.Metrics.MeanAbsoluteError):F2}");

    // R2 的标准差 — 反映模型稳定性
    var r2Values = cvResults.Select(r => r.Metrics.RSquared).ToArray();
    var r2Std = Math.Sqrt(r2Values.Average(v => Math.Pow(v - r2Values.Average(), 2)));
    Console.WriteLine($"R2 标准差 = {r2Std:F4}"); // 越小说明模型越稳定
}

AutoML 自动机器学习

/// <summary>
/// AutoML — 自动选择最佳算法和超参数
/// 适合快速验证模型效果，不需要手动调参
/// </summary>
public class AutoMlExperiment
{
    public void RunBinaryClassification(string dataPath)
    {
        var mlContext = new MLContext(seed: 42);

        // 加载数据
        var dataView = mlContext.Data.LoadFromTextFile<SentimentData>(dataPath, hasHeader: true);
        var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

        // 配置 AutoML 实验
        var experiment = mlContext.Auto()
            .CreateBinaryClassificationExperiment(maxExperimentTimeInSeconds: 60)
            .SetTrainingData(split.TrainSet)
            .SetValidationData(split.TestSet);

        // 运行实验
        var result = experiment.Run();

        // 输出所有尝试过的模型
        Console.WriteLine("AutoML 实验结果：");
        Console.WriteLine($"最佳模型编号：{result.BestRun.RunIndex}");
        Console.WriteLine($"最佳算法：{result.BestRun.TrainerName}");
        Console.WriteLine($"最佳准确率：{result.BestRun.Metrics.Accuracy:P2}");

        foreach (var run in result.Runs.OrderByDescending(r => r.Metrics.Accuracy).Take(5))
        {
            Console.WriteLine($"  #{run.RunIndex} {run.TrainerName,-30} Accuracy={run.Metrics.Accuracy:P2}");
        }

        // 使用最佳模型进行预测
        var bestModel = result.BestRun.Model;
        var predictor = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(bestModel);

        var testInput = new SentimentData { SentimentText = "这个产品非常好用" };
        var prediction = predictor.Predict(testInput);
        Console.WriteLine($"\n预测结果：{(prediction.Prediction ? "正面" : "负面")}，概率={prediction.Probability:P2}");
    }
}

ASP.NET Core 集成

模型服务注册

/// <summary>
/// 在 ASP.NET Core 中集成 ML.NET 模型
/// 将模型注册为 Scoped 服务，支持依赖注入
/// </summary>

// ===== Program.cs =====
builder.Services.AddScoped<IMlPredictionService, SentimentPredictionService>();
builder.Services.AddSingleton<MlModelLoader>();

// ===== 模型加载器 =====
public class MlModelLoader
{
    private readonly string _modelPath;
    private readonly MLContext _mlContext;
    private ITransformer _model;
    private PredictionEngine<SentimentData, SentimentPrediction> _predictionEngine;
    private readonly object _lock = new();

    public MlModelLoader(IConfiguration configuration)
    {
        _modelPath = configuration["ML:ModelPath"] ?? "Models/sentiment_model.zip";
        _mlContext = new MLContext(seed: 42);
        LoadModel();
    }

    /// <summary>
    /// 加载训练好的模型
    /// </summary>
    public void LoadModel()
    {
        lock (_lock)
        {
            _model = _mlContext.Model.Load(_modelPath, out var schema);
            _predictionEngine = _mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(_model);
        }
    }

    /// <summary>
    /// 线程安全的预测方法
    /// </summary>
    public SentimentPrediction Predict(SentimentData input)
    {
        lock (_lock)
        {
            return _predictionEngine.Predict(input);
        }
    }
}

// ===== 预测服务接口 =====
public interface IMlPredictionService
{
    SentimentResult PredictSentiment(string text);
    List<SentimentResult> PredictBatch(List<string> texts);
}

public class SentimentResult
{
    public string Text { get; set; }
    public bool IsPositive { get; set; }
    public double Probability { get; set; }
}

// ===== 预测服务实现 =====
public class SentimentPredictionService : IMlPredictionService
{
    private readonly MlModelLoader _modelLoader;
    private readonly ILogger<SentimentPredictionService> _logger;

    public SentimentPredictionService(MlModelLoader modelLoader, ILogger<SentimentPredictionService> logger)
    {
        _modelLoader = modelLoader;
        _logger = logger;
    }

    public SentimentResult PredictSentiment(string text)
    {
        try
        {
            var prediction = _modelLoader.Predict(new SentimentData { SentimentText = text });
            return new SentimentResult
            {
                Text = text,
                IsPositive = prediction.Prediction,
                Probability = Math.Round(prediction.Probability, 4)
            };
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "预测失败：{Text}", text);
            throw;
        }
    }

    public List<SentimentResult> PredictBatch(List<string> texts)
    {
        return texts.Select(PredictSentiment).ToList();
    }
}

API Controller

/// <summary>
/// 情感分析 API 控制器
/// </summary>
[ApiController]
[Route("api/[controller]")]
public class SentimentController : ControllerBase
{
    private readonly IMlPredictionService _predictionService;

    public SentimentController(IMlPredictionService predictionService)
    {
        _predictionService = predictionService;
    }

    /// <summary>
    /// 单条文本情感分析
    /// </summary>
    [HttpPost("predict")]
    public ActionResult<SentimentResult> Predict([FromBody] PredictRequest request)
    {
        if (string.IsNullOrWhiteSpace(request.Text))
        {
            return BadRequest("文本不能为空");
        }

        var result = _predictionService.PredictSentiment(request.Text);
        return Ok(result);
    }

    /// <summary>
    /// 批量情感分析
    /// </summary>
    [HttpPost("predict-batch")]
    public ActionResult<List<SentimentResult>> PredictBatch([FromBody] BatchPredictRequest request)
    {
        if (request.Texts == null || request.Texts.Count == 0)
        {
            return BadRequest("文本列表不能为空");
        }

        if (request.Texts.Count > 100)
        {
            return BadRequest("单次最多处理100条文本");
        }

        var results = _predictionService.PredictBatch(request.Texts);
        return Ok(results);
    }
}

public class PredictRequest
{
    public string Text { get; set; }
}

public class BatchPredictRequest
{
    public List<string> Texts { get; set; }
}

ONNX 模型集成

加载和推理 ONNX 模型

/// <summary>
/// ONNX 模型集成 — 在 ML.NET 中使用 Python 训练的模型
/// ONNX (Open Neural Network Exchange) 是跨框架的模型格式
/// </summary>
public class OnnxModelService
{
    private readonly MLContext _mlContext;
    private readonly ITransformer _model;
    private readonly string[] _labels;

    public OnnxModelService(string modelPath, string labelsPath)
    {
        _mlContext = new MLContext();

        // 加载 ONNX 模型
        var onnxPipeline = _mlContext.Transforms
            .ApplyOnnxModel(
                outputColumnNames: new[] { "output" },
                inputColumnNames: new[] { "input" },
                modelFile: modelPath);

        // 空的 IDataView 用于构建管道
        var emptyData = _mlContext.Data.LoadFromEnumerable(new List<OnnxInput>());
        _model = onnxPipeline.Fit(emptyData);

        // 加载标签文件
        _labels = File.ReadAllLines(labelsPath);
    }

    /// <summary>
    /// ONNX 输入数据模型
    /// </summary>
    public class OnnxInput
    {
        [ColumnName("input")]
        [VectorType(1, 3, 224, 224)]  // 图像分类模型：batch=1, channels=3, 224x224
        public float[] ImageData { get; set; }
    }

    public class OnnxOutput
    {
        [ColumnName("output")]
        [VectorType(1, 1000)]  // 1000个类别的概率
        public float[] Probabilities { get; set; }
    }

    /// <summary>
    /// 执行推理
    /// </summary>
    public (int TopClass, float Confidence, string Label) Predict(float[] imageData)
    {
        var predictor = _mlContext.Model.CreatePredictionEngine<OnnxInput, OnnxOutput>(_model);
        var input = new OnnxInput { ImageData = imageData };
        var output = predictor.Predict(input);

        // 找到概率最高的类别
        var topIndex = Array.IndexOf(output.Probabilities, output.Probabilities.Max());
        var confidence = output.Probabilities[topIndex];
        var label = topIndex < _labels.Length ? _labels[topIndex] : $"Unknown_{topIndex}";

        return (topIndex, confidence, label);
    }
}

在 ASP.NET Core 中注册 ONNX 服务

// Program.cs
builder.Services.AddSingleton<OnnxModelService>(sp =>
{
    var env = sp.GetRequiredService<IWebHostEnvironment>();
    var modelPath = Path.Combine(env.ContentRootPath, "Models", "resnet50.onnx");
    var labelsPath = Path.Combine(env.ContentRootPath, "Models", "labels.txt");
    return new OnnxModelService(modelPath, labelsPath);
});

完整示例：情感分析 Web API

/// <summary>
/// 完整的情感分析训练和部署流程
/// </summary>
public class SentimentAnalysisPipeline
{
    public static async Task RunAsync()
    {
        var mlContext = new MLContext(seed: 42);

        // 1. 准备训练数据
        var trainingData = new List<SentimentData>
        {
            new() { SentimentText = "这个产品非常好用，强烈推荐", Label = true },
            new() { SentimentText = "质量太差了，完全不推荐", Label = false },
            new() { SentimentText = "物流很快，包装完好", Label = true },
            new() { SentimentText = "客服态度恶劣，再也不买了", Label = false },
            new() { SentimentText = "性价比很高，值得购买", Label = true },
            new() { SentimentText = "描述不符，退货了", Label = false },
            new() { SentimentText = "使用体验很好，功能强大", Label = true },
            new() { SentimentText = "质量一般，不值这个价格", Label = false },
            new() { SentimentText = "非常喜欢，已经回购好几次了", Label = true },
            new() { SentimentText = "完全是浪费钱", Label = false },
        };

        var dataView = mlContext.Data.LoadFromEnumerable(trainingData);

        // 2. 构建管道
        var pipeline = mlContext.Transforms.Text
            .FeaturizeText("Features", nameof(SentimentData.SentimentText))
            .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression());

        // 3. 训练
        var model = pipeline.Fit(dataView);

        // 4. 预测
        var predictor = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);

        var testTexts = new[]
        {
            "这个真的很好用",
            "太差了，别买",
            "还行吧，一般般",
            "超级棒！"
        };

        foreach (var text in testTexts)
        {
            var prediction = predictor.Predict(new SentimentData { SentimentText = text });
            Console.WriteLine($"{text,-20} => {(prediction.Prediction ? "正面" : "负面")} ({prediction.Probability:P1})");
        }

        // 5. 保存模型
        var modelPath = "Models/sentiment_model.zip";
        Directory.CreateDirectory("Models");
        mlContext.Model.Save(model, dataView.Schema, modelPath);
        Console.WriteLine($"\n模型已保存到：{modelPath}");
    }
}

优点

1.C# 原生开发 — 无需切换语言，与 .NET 项目无缝集成
2.AutoML 支持 — 自动选择算法和调参，降低入门门槛
3.跨框架集成 — 通过 ONNX 支持 PyTorch、TensorFlow 等模型
4.部署简单 — 训练好的模型直接序列化为 .zip 文件，部署到 ASP.NET Core
5.文档完善 — 微软官方提供丰富的教程和示例

缺点

1.GPU 支持有限 — 深度学习训练不如 PyTorch/TensorFlow 成熟
2.NLP 能力有限 — 中文文本处理不如 Python 生态丰富
3.社区规模小 — 相比 Python ML 社区，ML.NET 社区和第三方库较少
4.模型可解释性差 — 不如 Python 的 SHAP、LIME 等工具
5.前沿算法滞后 — 最新论文的算法实现通常先出现在 Python 生态中

ML.NET 为 .NET 开发者打开了一扇机器学习的大门。掌握数据加载（IDataView）、数据转换（FeaturizeText、NormalizeMinMax）、模型训练（SDCA、FastTree、K-Means）、模型评估（交叉验证）的完整流程，结合 AutoML 快速验证效果，通过 ONNX 集成深度学习模型，并部署到 ASP.NET Core Web API，可以在 C# 技术栈内完成端到端的机器学习应用开发。