迭代器与 yield 底层原理

SunnyFan大约 10 分钟约 2865 字

迭代器与 yield 底层原理

简介

C# 的迭代器模式通过 yield return 语法糖实现，编译器自动生成实现 IEnumerable<T> 和 IEnumerator<T> 的状态机。理解迭代器的底层原理有助于编写高效的数据管道和避免常见陷阱。

特点

1.yield return — 编译器生成的状态机
2.延迟执行 — 按需计算，内存友好
3.迭代器状态 — 挂起与恢复机制
4.管道组合 — 多个迭代器链式处理
5.性能考量 — 分配与优化策略

yield return 原理

编译器生成代码

// 原始代码：
static IEnumerable<int> GetNumbers(int count)
{
    for (int i = 0; i < count; i++)
    {
        yield return i;
    }
}

// 编译器生成的等价代码（简化版）：
[CompilerGenerated]
private sealed class <GetNumbers>d__0 : IEnumerable<int>, IEnumerator<int>
{
    private int <>1__state;         // 状态
    private int <>l__initialThreadId;
    private int <i>5__1;            // 局部变量 i
    private int <>2__current;       // 当前值

    int IEnumerator<int>.Current => <>2__current;
    object IEnumerator.Current => <>2__current;

    void IEnumerator.Reset() => throw new NotSupportedException();

    bool IEnumerator.MoveNext()
    {
        switch (<>1__state)
        {
            case 0:
                <>1__state = -1;
                <i>5__1 = 0;
                break;
            case 1:
                <>1__state = -1;
                <i>5__1++;
                break;
            default:
                return false;
        }

        if (<i>5__1 < count) // 注意：count 被提升为字段
        {
            <>2__current = <i>5__1;
            <>1__state = 1;
            return true; // 挂起点
        }
        return false;
    }

    IEnumerator<int> IEnumerable<int>.GetEnumerator()
    {
        if (<>l__initialThreadId == Environment.CurrentManagedThreadId && <>1__state == -2)
        {
            <>1__state = 0;
            return this;
        }
        return new <GetNumbers>d__0(0) { count = this.count };
    }
}

yield break 与异常处理

// yield break — 提前终止迭代
static IEnumerable<int> FindPrimes(int max)
{
    if (max < 2) yield break; // 提前退出

    for (int i = 2; i <= max; i++)
    {
        if (IsPrime(i))
            yield return i;
    }
}

static bool IsPrime(int n)
{
    if (n < 2) return false;
    for (int i = 2; i <= Math.Sqrt(n); i++)
        if (n % i == 0) return false;
    return true;
}

// yield + try-finally（确保清理）
static IEnumerable<string> ReadLines(string path)
{
    StreamReader? reader = null;
    try
    {
        reader = new StreamReader(path);
        while (!reader.EndOfStream)
        {
            yield return reader.ReadLine()!;
        }
    }
    finally
    {
        reader?.Dispose(); // 迭代器被 Dispose 时执行
    }
}

// 注意：yield 不能在 try-catch 中使用
// ❌ 错误
// try { yield return 1; }
// catch { } // 编译错误

// ✅ yield 可以在 try-finally 中使用
// ✅ yield 不能在 catch 块中使用

延迟执行与管道

构建数据处理管道

// 迭代器管道：每个方法处理一个元素后立即传递
static IEnumerable<T> Filter<T>(this IEnumerable<T> source, Func<T, bool> predicate)
{
    foreach (var item in source)
        if (predicate(item))
            yield return item;
}

static IEnumerable<TResult> Map<T, TResult>(this IEnumerable<T> source, Func<T, TResult> selector)
{
    foreach (var item in source)
        yield return selector(item);
}

static IEnumerable<T> Take<T>(this IEnumerable<T> source, int count)
{
    int i = 0;
    foreach (var item in source)
    {
        if (i++ >= count) yield break;
        yield return item;
    }
}

// 管道组合：处理无限序列
static IEnumerable<int> Naturals()
{
    int n = 0;
    while (true) yield return n++;
}

// 使用管道处理
var result = Naturals()               // 0, 1, 2, 3, ...
    .Filter(n => n % 2 == 0)         // 0, 2, 4, 6, ...
    .Map(n => n * n)                  // 0, 4, 16, 36, ...
    .Take(5);                         // 0, 4, 16, 36, 64

foreach (var item in result)
    Console.WriteLine(item);
// 只计算需要的元素！不会计算所有自然数

惰性文件处理

// 处理大文件（逐行读取，不加载全部到内存）
static IEnumerable<LogEntry> ParseLogFile(string path)
{
    foreach (var line in File.ReadLines(path)) // ReadLines 是惰性的
    {
        if (string.IsNullOrWhiteSpace(line)) continue;
        if (line.StartsWith("#")) continue;

        var parts = line.Split('|');
        if (parts.Length >= 4)
        {
            yield return new LogEntry(
                DateTime.Parse(parts[0]),
                parts[1].Trim(),
                parts[2].Trim(),
                parts[3].Trim()
            );
        }
    }
}

// 管道式日志分析
var errors = ParseLogFile("app.log")
    .Where(e => e.Level == "ERROR")
    .GroupBy(e => e.Source)
    .Select(g => new { Source = g.Key, Count = g.Count() })
    .OrderByDescending(x => x.Count)
    .Take(10);

record LogEntry(DateTime Time, string Level, string Source, string Message);

迭代器陷阱

多次枚举与修改

// 陷阱 1：多次枚举
var items = GetExpensiveItems();
// 第一次遍历
var count = items.Count();    // 执行一次
// 第二次遍历
var first = items.First();    // 又执行一次！

// 解决：缓存结果
var cached = items.ToList();  // 执行一次，缓存结果

// 陷阱 2：集合在迭代时被修改
var list = new List<int> { 1, 2, 3, 4, 5 };
// ❌ 迭代中修改集合
foreach (var item in list)
{
    if (item > 3) list.Remove(item); // InvalidOperationException
}

// ✅ 先收集要移除的，再操作
var toRemove = list.Where(x => x > 3).ToList();
toRemove.ForEach(x => list.Remove(x));

// ✅ 使用 for 循环倒序删除
for (int i = list.Count - 1; i >= 0; i--)
{
    if (list[i] > 3) list.RemoveAt(i);
}

// 陷阱 3：yield 方法中的参数捕获
IEnumerable<int> BuggyClosure(List<int> source)
{
    // source 在遍历时才被访问，不是调用时
    foreach (var item in source)
        yield return item * 2;
}

var data = new List<int> { 1, 2, 3 };
var query = BuggyClosure(data);
data.Add(4);        // 修改源集合
data.AddRange([5, 6]);
foreach (var item in query) // 会包含 4, 5, 6！
    Console.WriteLine(item);
// 输出: 2, 4, 6, 8, 10, 12

迭代器的 Dispose 行为

/// <summary>
/// 迭代器的 Dispose 机制
/// </summary>

// 迭代器实现了 IDisposable
// foreach 自动调用 Dispose
// 手动使用时必须 Dispose

// 迭代器的 try-finally 与 Dispose 的关系
IEnumerable<int> ReadFileWithCleanup(string path)
{
    var reader = new StreamReader(path);
    try
    {
        string? line;
        while ((line = reader.ReadLine()) != null)
        {
            yield return line.Length;
        }
    }
    finally
    {
        reader.Dispose();
    }
}

// 如果迭代器未完全遍历就 Dispose：
// foreach 在 break 时自动 Dispose
foreach (var len in ReadFileWithCleanup("test.txt"))
{
    if (len > 100)
        break; // 自动调用 Dispose，关闭 StreamReader
}

// 手动使用时必须 Dispose
var enumerator = ReadFileWithCleanup("test.txt").GetEnumerator();
try
{
    while (enumerator.MoveNext())
    {
        if (enumerator.Current > 100)
            break;
    }
}
finally
{
    enumerator.Dispose(); // 必须手动 Dispose
}

// using 声明简化
using var e = ReadFileWithCleanup("test.txt").GetEnumerator();
while (e.MoveNext())
{
    Console.WriteLine(e.Current);
}
// 离开作用域时自动 Dispose

// yield break 与 Dispose 的关系
IEnumerable<int> EarlyReturn()
{
    Console.WriteLine("开始");
    yield return 1;
    Console.WriteLine("中间");
    yield break; // 提前终止，finally 仍会执行
    Console.WriteLine("不会执行");
}

性能优化

减少迭代器分配

// 迭代器方法会分配一个状态机对象（~100 bytes）
// 对于热路径，可以用手动迭代器避免

// 方式 1：返回 struct enumerator
public struct RangeEnumerator : IEnumerator<int>
{
    private readonly int _start, _end;
    private int _current;

    public RangeEnumerator(int start, int end)
    {
        _start = start; _end = end;
        _current = start - 1;
    }

    public int Current => _current;
    object IEnumerator.Current => _current;
    public bool MoveNext() => ++_current < _end;
    public void Reset() => _current = _start - 1;
    public void Dispose() { }
}

// 方式 2：使用 CollectionsMarshal.AsSpan
// List<T> 可以转为 Span<T> 避免 GetEnumerator 分配
void ProcessList(List<int> list)
{
    ReadOnlySpan<int> span = CollectionsMarshal.AsSpan(list);
    foreach (var item in span) // 无分配遍历
    {
        Process(item);
    }
}

// 方式 3：直接使用索引 for 循环
void ProcessArray(int[] array)
{
    // 数组的 for 循环会被 JIT 优化（消除边界检查）
    for (int i = 0; i < array.Length; i++)
    {
        Process(array[i]);
    }
}

IAsyncEnumerable 与异步迭代

/// <summary>
/// C# 8+ 异步迭代器 — yield return 的异步版本
/// </summary>

// 异步迭代器方法
async IAsyncEnumerable<string> ReadLinesAsync(string path)
{
    using var reader = new StreamReader(path);
    while (!reader.EndOfStream)
    {
        var line = await reader.ReadLineAsync();
        if (line != null)
            yield return line;
    }
}

// 消费异步迭代器
await foreach (var line in ReadLinesAsync("largefile.txt"))
{
    Console.WriteLine(line);
}

// 带取消令牌的异步迭代器
async IAsyncEnumerable<int> GenerateNumbersAsync(
    int count,
    [EnumeratorCancellation] CancellationToken ct = default)
{
    for (int i = 0; i < count; i++)
    {
        ct.ThrowIfCancellationRequested();
        await Task.Delay(100, ct);
        yield return i;
    }
}

// 使用 WithCancellation 配置取消令牌
await foreach (var num in GenerateNumbersAsync(100)
    .WithCancellation(cancellationToken))
{
    Console.WriteLine(num);
}

// 异步迭代器的管道组合
async IAsyncEnumerable<T> FilterAsync<T>(
    IAsyncEnumerable<T> source,
    Func<T, bool> predicate)
{
    await foreach (var item in source)
    {
        if (predicate(item))
            yield return item;
    }
}

async IAsyncEnumerable<TResult> MapAsync<TSource, TResult>(
    IAsyncEnumerable<TSource> source,
    Func<TSource, Task<TResult>> selector)
{
    await foreach (var item in source)
    {
        yield return await selector(item);
    }
}

// 使用管道
var results = FilterAsync(
    ReadLinesAsync("data.txt"),
    line => line.StartsWith("ERROR:"));

迭代器的调试技巧

/// <summary>
/// 调试 yield 方法的技巧
/// </summary>

// 1. 调试器在 yield 处的行为
// yield 方法中的断点会在每次 MoveNext 时触发
// 这意味着断点可能被触发多次

// 2. 查看迭代器状态
IEnumerable<int> GetNumbers()
{
    for (int i = 0; i < 10; i++)
    {
        Console.WriteLine($"Yielding {i}");
        yield return i;
        Console.WriteLine($"Resumed after {i}");
    }
}

// 3. 使用 Materialize 避免延迟执行问题
// 将延迟执行的迭代器立即执行并缓存
public static class IteratorExtensions
{
    public static List<T> Materialize<T>(this IEnumerable<T> source)
    {
        return source.ToList();
    }

    public static IReadOnlyList<T> Memoize<T>(this IEnumerable<T> source)
    {
        return source.ToList(); // 简单实现
    }

    // 真正的惰性缓存（只遍历一次）
    public static IReadOnlyList<T> MemoizeLazy<T>(this IEnumerable<T> source)
    {
        var list = new List<T>();
        bool enumerated = false;

        foreach (var item in source)
        {
            list.Add(item);
        }
        enumerated = true;
        return list;
    }
}

// 4. 使用 SequenceEqual 验证迭代器输出
var expected = new[] { 2, 4, 6, 8, 10 };
var actual = GetEvenNumbers().Take(5);
Assert.True(expected.SequenceEqual(actual));

优点

1.延迟计算 — 只在需要时生成数据
2.内存友好 — 处理无限序列和大文件
3.管道组合 — 声明式数据处理
4.代码简洁 — 编译器自动生成复杂状态机

缺点

1.分配开销 — 每个迭代器方法分配状态机对象
2.多次枚举 — 同一查询可能被执行多次
3.调试困难 — yield 断点行为与普通代码不同
4.异常时机 — 迭代器方法中的异常延迟到遍历时

yield return 编译器生成实现 IEnumerable<T>/IEnumerator<T> 的状态机类，通过 state 字段管理挂起和恢复。迭代器天然支持延迟执行和管道组合——数据逐个元素流过管道。yield break 提前终止，try-finally 确保资源清理。常见陷阱：多次枚举同一查询、迭代中修改集合、闭包中参数捕获时机。性能优化：热路径用 struct enumerator、CollectionsMarshal.AsSpan 或直接 for 循环替代迭代器。