BCL 源码解读

SunnyFan大约 12 分钟约 3696 字

BCL 源码解读

简介

.NET 基础类库（BCL，Base Class Library）是 .NET 的核心类库，包含集合、IO、线程、LINQ 等基础类型。通过阅读 BCL 源码，可以学习优秀的设计模式、理解内部实现原理、提升编码水平。.NET Core 之后 BCL 完全开源，源码仓库见 github.com/dotnet/runtime。

特点

1.集合源码 — List/Dictionary/HashSet 实现
2.LINQ 源码 — Where/Select/OrderBy 实现
3.Task 源码 — 异步基础设施
4.Span 源码 — 内存安全抽象
5.设计模式 — BCL 中的经典模式

`List<T>` 源码解读

动态数组实现

// List<T> 核心实现（简化版）
public class List<T>
{
    private T[] _items;           // 内部数组
    private int _size;            // 实际元素数量
    private int _version;         // 版本号（检测并发修改）

    public List(int capacity = 0)
    {
        _items = capacity == 0 ? Array.Empty<T>() : new T[capacity];
        _size = 0;
    }

    public int Count => _size;
    public int Capacity
    {
        get => _items.Length;
        set
        {
            if (value < _size)
                throw new ArgumentOutOfRangeException();
            if (value != _items.Length)
            {
                if (value > 0)
                {
                    var newItems = new T[value];
                    if (_size > 0)
                        Array.Copy(_items, newItems, _size);
                    _items = newItems;
                }
                else
                {
                    _items = Array.Empty<T>();
                }
            }
        }
    }

    public void Add(T item)
    {
        _version++; // 修改版本号
        T[] array = _items;
        int size = _size;
        if ((uint)size < (uint)array.Length)
        {
            _size = size + 1;
            array[size] = item; // 直接赋值（有空间时）
        }
        else
        {
            AddWithResize(item); // 需要扩容
        }
    }

    private void Grow(int capacity)
    {
        int newCapacity = 2 * _items.Length; // 2 倍扩容
        if ((uint)newCapacity > Array.MaxLength) newCapacity = Array.MaxLength;
        if (newCapacity < capacity) newCapacity = capacity;
        Capacity = newCapacity;
    }

    // 关键优化点：
    // 1. 初始为 Array.Empty<T>()（避免为空列表分配数组）
    // 2. Add 有 fast path（不扩容时无额外开销）
    // 3. 2 倍扩容保证均摊 O(1)
    // 4. _version 检测 foreach 期间的修改
}

`Dictionary<TKey, TValue>` 源码解读

哈希表实现

// Dictionary 核心数据结构
public class Dictionary<TKey, TValue>
{
    private struct Entry
    {
        public uint hashCode;    // 哈希值（高位1表示已使用）
        public int next;         // 冲突链的下一个索引（-1表示末尾）
        public TKey key;
        public TValue value;
    }

    private int[] _buckets;      // 桶数组（存储链头索引）
    private Entry[] _entries;    // 条目数组（连续存储）
    private int _count;
    private int _freeList;       // 空闲链表头（已删除的条目）
    private int _freeCount;

    // 查找过程
    public TValue this[TKey key]
    {
        get
        {
            uint hashCode = (uint)key.GetHashCode();
            int bucketIndex = hashCode % (uint)_buckets.Length;
            int entriesIndex = _buckets[bucketIndex] - 1; // 桶中存储的是 +1 索引

            while (entriesIndex >= 0)
            {
                ref Entry entry = ref _entries[entriesIndex];
                if (entry.hashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry.key, key))
                    return entry.value;
                entriesIndex = entry.next; // 沿冲突链查找
            }

            throw new KeyNotFoundException();
        }
    }

    // 插入过程
    public void Add(TKey key, TValue value)
    {
        uint hashCode = (uint)key.GetHashCode();
        int bucketIndex = hashCode % (uint)_buckets.Length;

        // 检查是否已存在
        int entriesIndex = _buckets[bucketIndex] - 1;
        while (entriesIndex >= 0)
        {
            ref Entry entry = ref _entries[entriesIndex];
            if (entry.hashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entry.key, key))
                throw new ArgumentException("键已存在");
            entriesIndex = entry.next;
        }

        // 获取空闲位置
        int index;
        if (_freeCount > 0)
        {
            index = _freeList;
            _freeList = _entries[index].next;
            _freeCount--;
        }
        else
        {
            if (_count == _entries.Length) Resize();
            index = _count;
        }

        // 写入条目
        ref Entry newEntry = ref _entries[index];
        newEntry.hashCode = hashCode;
        newEntry.key = key;
        newEntry.value = value;
        newEntry.next = _buckets[bucketIndex] - 1; // 头插法
        _buckets[bucketIndex] = index + 1;
        _count++;
    }

    // 关键设计点：
    // 1. 桶和条目分离：桶存链头索引，条目连续存储
    // 2. 头插法：新条目插到链头（O(1)）
    // 3. 空闲链表：删除的条目被复用
    // 4. +1 索引：0 表示空桶（避免 -1 的额外检查）
    // 5. ref struct 优化：使用 ref 减少拷贝
}

LINQ Where/Select 源码

延迟执行实现

// LINQ Where 的核心实现（.NET 8）
public static IEnumerable<TSource> Where<TSource>(
    this IEnumerable<TSource> source, Func<TSource, bool> predicate)
{
    // 优化：使用 Iterator<TSource> 基类
    return new WhereIterator<TSource>(source, predicate);
}

private sealed class WhereIterator<TSource> : Iterator<TSource>
{
    private readonly IEnumerable<TSource> _source;
    private readonly Func<TSource, bool> _predicate;
    private IEnumerator<TSource>? _enumerator;

    public override bool MoveNext()
    {
        switch (_state)
        {
            case 0: // 初始化
                _enumerator = _source.GetEnumerator();
                _state = 1;
                goto case 1;
            case 1: // 遍历
                while (_enumerator!.MoveNext())
                {
                    TSource item = _enumerator.Current;
                    if (_predicate(item))
                    {
                        _current = item;
                        return true;
                    }
                }
                Dispose(); // 遍历结束
                return false;
        }
        return false;
    }

    // 优化：如果 source 是数组或 List，使用特化版本
    // WhereArrayIterator 直接用 for 循环（无 GetEnumerator 开销）
    // WhereListIterator 直接用 List.Enumerator（struct，无分配）
}

// Select 的类似实现
private sealed class SelectIterator<TSource, TResult> : Iterator<TResult>
{
    private readonly IEnumerable<TSource> _source;
    private readonly Func<TSource, TResult> _selector;
    private IEnumerator<TSource>? _enumerator;

    public override bool MoveNext()
    {
        if (_state == 0)
        {
            _enumerator = _source.GetEnumerator();
            _state = 1;
        }

        if (_enumerator!.MoveNext())
        {
            _current = _selector(_enumerator.Current);
            return true;
        }

        Dispose();
        return false;
    }
}

// BCL 优化策略：
// 1. 特化迭代器：数组、List、IList 有专用实现
// 2. 融合优化：Where + Select 合并为一个迭代器
// 3. 结构体枚举器：List<T>.Enumerator 是 struct

`Span<T>` 源码解读

内存安全抽象

// Span<T> 是 ref struct，只能存在于栈上
public readonly ref struct Span<T>
{
    // 内部引用（ByReference<T> 或指针）
    internal readonly ByReference<T> _reference;
    private readonly int _length;

    // 从数组创建
    public Span(T[] array)
    {
        _reference = new ByReference<T>(ref MemoryMarshal.GetArrayDataReference(array));
        _length = array.Length;
    }

    // 从数组切片
    public Span(T[] array, int start, int length)
    {
        _reference = new ByReference<T>(ref array[start]);
        _length = length;
    }

    // 索引器（无边界检查的高性能版本）
    public ref T this[int index]
    {
        get
        {
            // InternalThrow 在 Release 下被 JIT 消除
            if ((uint)index >= (uint)_length)
                ThrowHelper.ThrowIndexOutOfRangeException();
            return ref Unsafe.Add(ref _reference.Value, index);
        }
    }

    // 切片（零分配）
    public Span<T> Slice(int start, int length)
    {
        return new Span<T>(
            ref Unsafe.Add(ref _reference.Value, start),
            length);
    }

    // 关键安全保证：
    // 1. ref struct → 不能逃逸到堆上
    // 2. 不能作为 class 字段
    // 3. 不能在 async 方法中使用
    // 4. 不能被 lambda 捕获
    // → 确保 Span 指向的内存在使用期间有效
}

`ConcurrentDictionary<TKey, TValue>` 源码解读

并发安全的哈希表

// ConcurrentDictionary 使用分段锁（Striped Lock）实现并发安全
// .NET 6+ 优化为更细粒度的锁策略

public class ConcurrentDictionary<TKey, TValue>
{
    private readonly object[] _locks;       // 分段锁数组
    private readonly VolatileRectangle[] _buckets; // 桶数组
    private readonly Node[] _nodes;         // 节点数组

    // 核心设计：
    // 1. 多个锁 — 默认为 Environment.ProcessorCount 个锁
    //    不同桶可能使用不同的锁，减少锁竞争
    // 2. TryGetValue 无锁读取 — 使用 Volatile.Read 保证可见性
    // 3. TryAdd 使用锁 — 写入时只锁定对应的段

    // 无锁读取（高性能路径）
    public bool TryGetValue(TKey key, [MaybeNullWhen(false)] out TValue value)
    {
        // 1. 计算哈希
        int hashcode = comparer.GetHashCode(key);

        // 2. 查找桶（使用 Volatile.Read 保证读到最新值）
        Node? node = Volatile.Read(ref GetBucket(hashcode));

        // 3. 遍历链表（无锁）
        while (node != null)
        {
            if (node.Hashcode == hashcode && comparer.Equals(node.Key, key))
            {
                value = node.Value;
                return true;
            }
            node = node.Next;
        }

        value = default;
        return false;
    }

    // 写入（加锁路径）
    public bool TryAdd(TKey key, TValue value)
    {
        int hashcode = comparer.GetHashCode(key);
        // 获取该桶对应的锁
        object lockObj = _locks[GetLockIndex(hashcode)];

        lock (lockObj)
        {
            // 再次检查（double-check）
            // 插入新节点
            // 更新桶引用（使用 Volatile.Write 保证可见性）
            Volatile.Write(ref GetBucket(hashcode), newNode);
            return true;
        }
    }

    // 关键优化点：
    // 1. 读操作完全无锁 — 适合读多写少场景
    // 2. 写操作只锁定对应段 — 减少锁竞争
    // 3. 使用 Volatile.Read/Write 替代 lock — 读性能接近普通 Dictionary
    // 4. GetOrAdd / AddOrUpdate 支持工厂方法，避免多余的对象创建
}

`Channel<T>` 源码解读

异步生产者-消费者模型

// Channel<T> 是 .NET Core 3.0+ 引入的高性能异步通道
// 核心实现基于 ConcurrentQueue + AsyncWaitHandle

// 有界通道（BoundedChannel）
public static Channel<T> CreateBounded<T>(int capacity)
{
    // 内部使用 ConcurrentQueue<T> 存储数据
    // 使用 SemaphoreSlim 控制写入（满时等待）
    // 使用 TaskCompletionSource 控制读取（空时等待）
}

// 无界通道（UnboundedChannel）
public static Channel<T> CreateUnbounded<T>()
{
    // 内部使用 ConcurrentQueue<T> 存储数据
    // 写入永不阻塞
    // 读取时如果队列为空，等待 TaskCompletionSource 信号
}

// 核心读取方法（简化版）
public async ValueTask<T> ReadAsync(CancellationToken ct)
{
    // 1. 尝试从队列中取出（无锁）
    if (_queue.TryDequeue(out T? item))
        return item;

    // 2. 队列为空，创建等待任务
    var tcs = new TaskCompletionSource<T>();
    _readers.Enqueue(tcs);

    // 3. 再次检查（避免竞态条件）
    if (_queue.TryDequeue out item))
    {
        // 有新数据了，取消等待
        _readers.TryDequeue(out _);
        return item;
    }

    // 4. 真正等待
    return await tcs.Task.WaitAsync(ct);
}

// 核心写入方法（简化版）
public bool TryWrite(T item)
{
    // 1. 写入队列
    _queue.Enqueue(item);

    // 2. 唤醒等待的读者
    if (_readers.TryDequeue(out var tcs))
    {
        tcs.TrySetResult(item);
    }

    return true;
}

// 性能优势：
// 1. 无锁读取路径 — 数据可用时无 async 开销
// 2. 仅在等待时分配 Task — 减少内存分配
// 3. 支持取消和超时 — 与 CancellationToken 集成
// 4. 支持多读者多写者 — 线程安全

BCL 中的设计模式

经典模式应用

// 1. 策略模式 — IEqualityComparer<T>
// Dictionary 的比较逻辑通过接口注入
public class Dictionary<TKey, TValue>
{
    private readonly IEqualityComparer<TKey> _comparer;

    public Dictionary(IEqualityComparer<TKey>? comparer)
    {
        _comparer = comparer ?? EqualityComparer<TKey>.Default;
    }
}

// 2. 工厂模式 — TaskCreationOptions
// Task.Factory 使用选项控制创建行为
Task.Factory.StartNew(action, cancellationToken,
    TaskCreationOptions.LongRunning, TaskScheduler.Default);

// 3. 观察者模式 — IObservable<T>/IObserver<T>
// 反应式扩展的基础
public interface IObservable<T>
{
    IDisposable Subscribe(IObserver<T> observer);
}

// 4. 迭代器模式 — IEnumerable<T>/IEnumerator<T>
// foreach 和 LINQ 的基础

// 5. 适配器模式 — StreamReader/StreamWriter
// 适配 Stream 为文本读写

// 6. 装饰器模式 — BufferedStream
// 包装 Stream 添加缓冲功能
var buffered = new BufferedStream(new FileStream("data.bin", FileMode.Open));

// 7. 模板方法 — Stream 抽象类
public abstract class Stream
{
    public abstract int Read(byte[] buffer, int offset, int count);
    public abstract void Write(byte[] buffer, int offset, int count);

    // 模板方法提供默认实现
    public virtual int ReadByte() { /* 使用 Read */ }
    public virtual void WriteByte(byte value) { /* 使用 Write */ }
}

// 8. 享元模式 — String.Empty / Array.Empty<T>()
// 共享不可变实例减少分配

BCL 中的性能优化技巧

内存分配优化

// 1. Array.Empty<T>() — 避免为空数组分配内存
// 不好的写法
var empty1 = new int[0]; // 每次调用都分配新数组
var empty2 = new string[0];

// 好的写法 — 使用缓存的空数组
var empty3 = Array.Empty<int>();    // 进程内只分配一次
var empty4 = Array.Empty<string>();

// 2. string.Create — 减少中间字符串分配
// 不好的写法
string hex1 = BitConverter.ToString(bytes).Replace("-", "");

// 好的写法
string hex2 = string.Create(bytes.Length * 2, bytes, (span, b) =>
{
    for (int i = 0; i < b.Length; i++)
    {
        var hex = b[i].ToString("X2");
        span[i * 2] = hex[0];
        span[i * 2 + 1] = hex[1];
    }
});

// 3. ValueTask — 避免同步完成时的 Task 分配
// 不好的写法（同步完成也分配 Task）
Task<int> GetValueAsync() => Task.FromResult(42);

// 好的写法（同步完成无分配）
ValueTask<int> GetValueAsync() => new ValueTask<int>(42);

// 4. ObjectPool<T> — 复用对象减少 GC 压力
var pool = new DefaultObjectPool<StringBuilder>(new StringBuilderPooledObjectPolicy());
var sb = pool.Get();
try
{
    sb.Append("Hello");
    sb.Append(" World");
    return sb.ToString();
}
finally
{
    sb.Clear();
    pool.Return(sb); // 归还到池中复用
}

集合操作性能对比

// 不同集合操作的时间复杂度对比：

// | 操作             | List<T>  | Dictionary<K,V> | HashSet<T> | SortedSet<T> |
// |-----------------|----------|-----------------|------------|--------------|
// | 按索引访问       | O(1)     | -               | -          | -            |
// | 按键查找         | O(n)     | O(1) 平均       | O(1) 平均  | O(log n)     |
// | 添加             | O(1) 均摊| O(1) 均摊      | O(1) 均摊  | O(log n)     |
// | 删除             | O(n)     | O(1) 平均       | O(1) 平均  | O(log n)     |
// | 排序             | O(nlogn) | -               | -          | 已排序        |
// | 内存开销         | 低       | 中              | 中         | 高           |

// List<T> 的 Capacity 调优
var list = new List<int>(1000); // 预分配容量，避免多次扩容
// 默认容量为 0，第一次 Add 分配 4，之后 2 倍扩容
// 如果知道大致数量，预分配可以避免：
// 1. 多次数组分配和复制
// 2. 内存碎片
// 3. GC 压力

// Dictionary 的初始容量
var dict = new Dictionary<string, int>(100); // 预分配桶大小
// Dictionary 的默认初始容量为 3（素数），负载因子为 0.72
// 超过容量 * 0.72 时自动扩容（分配新数组 + 重新哈希）
// 预分配可以避免重新哈希的开销

优点

1.学习设计 — BCL 是 .NET 最佳实践的集合
2.性能优化 — 学习高性能数据结构实现
3.开源透明 — 可以直接查看和调试源码
4.模式参考 — 经典设计模式的实际应用

缺点

1.代码量大 — runtime 仓库超过百万行代码
2.平台特殊 — 大量 unsafe 和平台相关代码
3.复杂优化 — 性能优化代码可读性低
4.版本差异 — 不同版本实现不同

BCL 源码是学习 .NET 最佳实践的宝库。List<T> 使用 2 倍扩容和 Array.Empty<T>() 优化。Dictionary 使用桶+条目分离结构，头插法处理冲突，空闲链表复用删除条目。LINQ 通过特化迭代器（数组/List 专用）和迭代器融合优化性能。Span<T> 使用 ref struct 确保栈上安全，Unsafe.Add 实现高性能指针算术。BCL 中大量应用了策略、工厂、观察者、迭代器、装饰器等经典设计模式。