Python 算法与数据结构

SunnyFan大约 11 分钟约 3443 字

Python 算法与数据结构

简介

算法与数据结构是编程的基础能力，直接影响代码的执行效率和可扩展性。Python 标准库提供了丰富的内置数据结构（list、dict、set、deque 等），同时可以自定义实现树、图、堆等高级结构。掌握常用算法模式和数据结构选择，是写出高效 Python 代码的前提。

特点

1.内置丰富 — Python 标准库提供 list、dict、set、deque、heapq 等高效结构
2.表达力强 — Python 语法简洁，算法实现可读性高
3.标准库工具 — bisect、heapq、itertools、functools 等模块提供常用算法
4.性能权衡 — Python 适合算法原型验证，性能关键场景可配合 C 扩展
5.面试必备 — 算法与数据结构是技术面试的核心考察内容

实现

核心数据结构实现

from typing import Any, Optional
from collections import deque
import heapq

# 1. 链表
class ListNode:
    def __init__(self, val: Any, next_node=None):
        self.val = val
        self.next = next_node

class LinkedList:
    """单链表"""
    def __init__(self):
        self.head: Optional[ListNode] = None
        self.size = 0

    def append(self, val: Any) -> None:
        new_node = ListNode(val)
        if not self.head:
            self.head = new_node
        else:
            current = self.head
            while current.next:
                current = current.next
            current.next = new_node
        self.size += 1

    def prepend(self, val: Any) -> None:
        self.head = ListNode(val, self.head)
        self.size += 1

    def delete(self, val: Any) -> bool:
        if not self.head:
            return False
        if self.head.val == val:
            self.head = self.head.next
            self.size -= 1
            return True
        current = self.head
        while current.next:
            if current.next.val == val:
                current.next = current.next.next
                self.size -= 1
                return True
            current = current.next
        return False

    def reverse(self) -> None:
        prev, current = None, self.head
        while current:
            next_node = current.next
            current.next = prev
            prev = current
            current = next_node
        self.head = prev

    def to_list(self) -> list:
        result = []
        current = self.head
        while current:
            result.append(current.val)
            current = current.next
        return result

# 2. 二叉搜索树
class TreeNode:
    def __init__(self, val: int):
        self.val = val
        self.left: Optional["TreeNode"] = None
        self.right: Optional["TreeNode"] = None

class BST:
    """二叉搜索树"""
    def __init__(self):
        self.root: Optional[TreeNode] = None

    def insert(self, val: int) -> None:
        self.root = self._insert(self.root, val)

    def _insert(self, node: Optional[TreeNode], val: int) -> TreeNode:
        if not node:
            return TreeNode(val)
        if val < node.val:
            node.left = self._insert(node.left, val)
        elif val > node.val:
            node.right = self._insert(node.right, val)
        return node

    def search(self, val: int) -> bool:
        return self._search(self.root, val)

    def _search(self, node: Optional[TreeNode], val: int) -> bool:
        if not node:
            return False
        if val == node.val:
            return True
        return self._search(node.left if val < node.val else node.right, val)

    def inorder(self) -> list[int]:
        """中序遍历（有序输出）"""
        result = []
        self._inorder(self.root, result)
        return result

    def _inorder(self, node: Optional[TreeNode], result: list) -> None:
        if node:
            self._inorder(node.left, result)
            result.append(node.val)
            self._inorder(node.right, result)

# 3. LRU 缓存
class LRUCache:
    """LRU 缓存（Python 标准库实现）"""
    def __init__(self, capacity: int):
        from collections import OrderedDict
        self.cache = OrderedDict()
        self.capacity = capacity

    def get(self, key: int) -> int:
        if key not in self.cache:
            return -1
        self.cache.move_to_end(key)
        return self.cache[key]

    def put(self, key: int, value: int) -> None:
        if key in self.cache:
            self.cache.move_to_end(key)
        self.cache[key] = value
        if len(self.cache) > self.capacity:
            self.cache.popitem(last=False)

# 使用
lru = LRUCache(3)
lru.put(1, 100)
lru.put(2, 200)
lru.put(3, 300)
lru.get(1)        # 100，key 1 变为最近使用
lru.put(4, 400)   # 淘汰 key 2（最久未使用）
print(lru.get(2)) # -1（已淘汰）

排序与搜索算法

from typing import List, Any
import bisect

# 1. 排序算法

def merge_sort(arr: List[int]) -> List[int]:
    """归并排序 - O(n log n)"""
    if len(arr) <= 1:
        return arr
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])
    right = merge_sort(arr[mid:])
    return _merge(left, right)

def _merge(left: List[int], right: List[int]) -> List[int]:
    result = []
    i = j = 0
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
    result.extend(left[i:])
    result.extend(right[j:])
    return result

def quick_sort(arr: List[int]) -> List[int]:
    """快速排序 - O(n log n) 平均"""
    if len(arr) <= 1:
        return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quick_sort(left) + middle + quick_sort(right)

# 2. 搜索算法

def binary_search(arr: List[int], target: int) -> int:
    """二分查找 - O(log n)"""
    left, right = 0, len(arr) - 1
    while left <= right:
        mid = (left + right) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    return -1

def binary_search_leftmost(arr: List[int], target: int) -> int:
    """查找左边界（第一个等于 target 的位置）"""
    left, right = 0, len(arr)
    while left < right:
        mid = (left + right) // 2
        if arr[mid] < target:
            left = mid + 1
        else:
            right = mid
    return left if left < len(arr) and arr[left] == target else -1

# 使用 bisect 标准库
sorted_list = [1, 3, 3, 5, 7, 9]
print(bisect.bisect_left(sorted_list, 3))   # 1（左侧插入位置）
print(bisect.bisect_right(sorted_list, 3))  # 3（右侧插入位置）
bisect.insort(sorted_list, 4)  # 插入并保持有序

图算法

from collections import defaultdict, deque
from typing import Dict, List, Set, Optional
import heapq

class Graph:
    """图的常用算法"""
    def __init__(self):
        self.adj: Dict[str, List[tuple]] = defaultdict(list)

    def add_edge(self, u: str, v: str, weight: int = 1, directed: bool = False):
        self.adj[u].append((v, weight))
        if not directed:
            self.adj[v].append((u, weight))

    def bfs(self, start: str) -> List[str]:
        """广度优先搜索"""
        visited = set([start])
        queue = deque([start])
        order = []
        while queue:
            node = queue.popleft()
            order.append(node)
            for neighbor, _ in self.adj[node]:
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append(neighbor)
        return order

    def dfs(self, start: str) -> List[str]:
        """深度优先搜索"""
        visited = set()
        order = []

        def _dfs(node: str):
            visited.add(node)
            order.append(node)
            for neighbor, _ in self.adj[node]:
                if neighbor not in visited:
                    _dfs(neighbor)

        _dfs(start)
        return order

    def dijkstra(self, start: str) -> Dict[str, int]:
        """Dijkstra 最短路径"""
        distances = {node: float("inf") for node in self.adj}
        distances[start] = 0
        pq = [(0, start)]

        while pq:
            dist, node = heapq.heappop(pq)
            if dist > distances[node]:
                continue
            for neighbor, weight in self.adj[node]:
                new_dist = dist + weight
                if new_dist < distances[neighbor]:
                    distances[neighbor] = new_dist
                    heapq.heappush(pq, (new_dist, neighbor))

        return distances

    def has_cycle(self) -> bool:
        """检测环（有向图）"""
        WHITE, GRAY, BLACK = 0, 1, 2
        color = {node: WHITE for node in self.adj}

        def dfs(node: str) -> bool:
            color[node] = GRAY
            for neighbor, _ in self.adj[node]:
                if color[neighbor] == GRAY:
                    return True
                if color[neighbor] == WHITE and dfs(neighbor):
                    return True
            color[node] = BLACK
            return False

        return any(dfs(node) for node in self.adj if color[node] == WHITE)

# 使用
g = Graph()
g.add_edge("A", "B", 4)
g.add_edge("A", "C", 2)
g.add_edge("B", "C", 1)
g.add_edge("B", "D", 5)
g.add_edge("C", "D", 8)

print(f"BFS: {g.bfs('A')}")         # BFS: ['A', 'B', 'C', 'D']
print(f"DFS: {g.dfs('A')}")         # DFS: ['A', 'B', 'C', 'D']
print(f"最短路径: {g.dijkstra('A')}") # {'A': 0, 'B': 4, 'C': 2, 'D': 9}

动态规划与常用算法模式

# 1. 动态规划：最长递增子序列
def length_of_lis(nums: List[int]) -> int:
    """O(n log n) 解法"""
    tails = []
    for num in nums:
        idx = bisect.bisect_left(tails, num)
        if idx == len(tails):
            tails.append(num)
        else:
            tails[idx] = num
    return len(tails)

# 2. 动态规划：背包问题
def knapsack(weights: List[int], values: List[int], capacity: int) -> int:
    """0-1 背包问题"""
    n = len(weights)
    dp = [0] * (capacity + 1)
    for i in range(n):
        for w in range(capacity, weights[i] - 1, -1):
            dp[w] = max(dp[w], dp[w - weights[i]] + values[i])
    return dp[capacity]

# 3. 前缀和
class PrefixSum:
    """前缀和（区间查询 O(1)）"""
    def __init__(self, arr: List[int]):
        self.prefix = [0]
        for num in arr:
            self.prefix.append(self.prefix[-1] + num)

    def range_sum(self, left: int, right: int) -> int:
        """查询区间 [left, right] 的和"""
        return self.prefix[right + 1] - self.prefix[left]

# 4. 滑动窗口
def max_window_sum(arr: List[int], k: int) -> int:
    """滑动窗口最大和"""
    if len(arr) < k:
        return sum(arr)
    window_sum = sum(arr[:k])
    max_sum = window_sum
    for i in range(k, len(arr)):
        window_sum += arr[i] - arr[i - k]
        max_sum = max(max_sum, window_sum)
    return max_sum

# 5. 并查集
class UnionFind:
    """并查集"""
    def __init__(self, n: int):
        self.parent = list(range(n))
        self.rank = [0] * n
        self.count = n  # 连通分量数

    def find(self, x: int) -> int:
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])  # 路径压缩
        return self.parent[x]

    def union(self, x: int, y: int) -> bool:
        px, py = self.find(x), self.find(y)
        if px == py:
            return False
        if self.rank[px] < self.rank[py]:
            px, py = py, px
        self.parent[py] = px
        if self.rank[px] == self.rank[py]:
            self.rank[px] += 1
        self.count -= 1
        return True

    def connected(self, x: int, y: int) -> bool:
        return self.find(x) == self.find(y)

# 使用示例
print(f"LIS 长度: {length_of_lis([10, 9, 2, 5, 3, 7, 101, 18])}")  # 4
print(f"背包最大值: {knapsack([1, 3, 4, 5], [1, 4, 5, 7], 7)}")    # 9
ps = PrefixSum([1, 2, 3, 4, 5])
print(f"区间 [1,3] 和: {ps.range_sum(1, 3)}")  # 9
print(f"窗口最大和: {max_window_sum([2, 1, 5, 1, 3, 2], 3)}")     # 9

uf = UnionFind(5)
uf.union(0, 1)
uf.union(2, 3)
print(f"连通分量: {uf.count}")  # 3
print(f"0-1 连通: {uf.connected(0, 1)}")  # True
print(f"0-2 连通: {uf.connected(0, 2)}")  # False

高级数据结构：Trie 字典树与单调栈

# 1. Trie 字典树
class TrieNode:
    def __init__(self):
        self.children: dict[str, TrieNode] = {}
        self.is_end = False
        self.count = 0  # 经过此节点的单词数

class Trie:
    """字典树 — 高效字符串检索"""
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word: str) -> None:
        node = self.root
        for ch in word:
            if ch not in node.children:
                node.children[ch] = TrieNode()
            node = node.children[ch]
            node.count += 1
        node.is_end = True

    def search(self, word: str) -> bool:
        """完全匹配"""
        node = self._find(word)
        return node is not None and node.is_end

    def starts_with(self, prefix: str) -> bool:
        """前缀搜索"""
        return self._find(prefix) is not None

    def count_prefix(self, prefix: str) -> int:
        """统计以 prefix 为前缀的单词数"""
        node = self._find(prefix)
        return node.count if node else 0

    def auto_complete(self, prefix: str, limit: int = 10) -> List[str]:
        """自动补全建议"""
        node = self._find(prefix)
        if not node:
            return []
        results = []
        self._dfs(node, prefix, results, limit)
        return results

    def _find(self, prefix: str) -> Optional[TrieNode]:
        node = self.root
        for ch in prefix:
            if ch not in node.children:
                return None
            node = node.children[ch]
        return node

    def _dfs(self, node: TrieNode, path: str, results: List[str], limit: int):
        if len(results) >= limit:
            return
        if node.is_end:
            results.append(path)
        for ch, child in node.children.items():
            self._dfs(child, path + ch, results, limit)

# 使用 Trie
trie = Trie()
for word in ["apple", "application", "apply", "app", "banana", "band"]:
    trie.insert(word)

print(trie.search("apple"))              # True
print(trie.starts_with("app"))           # True
print(trie.count_prefix("app"))          # 4
print(trie.auto_complete("app", 3))      # ['app', 'apple', 'application']

# 2. 单调栈 — 下一个更大元素
def next_greater_element(nums: List[int]) -> List[int]:
    """O(n) 解法 — 单调递减栈"""
    n = len(nums)
    result = [-1] * n
    stack = []  # 存下标

    for i in range(n):
        while stack and nums[i] > nums[stack[-1]]:
            idx = stack.pop()
            result[idx] = nums[i]
        stack.append(i)

    return result

print(next_greater_element([2, 1, 2, 4, 3]))  # [4, 2, 4, -1, -1]

# 3. 单调栈 — 接雨水问题
def trap_rain_water(height: List[int]) -> int:
    """接雨水 — 单调栈 O(n)"""
    stack = []
    water = 0

    for i in range(len(height)):
        while stack and height[i] > height[stack[-1]]:
            bottom = stack.pop()
            if not stack:
                break
            left = stack[-1]
            width = i - left - 1
            h = min(height[left], height[i]) - height[bottom]
            water += width * h
        stack.append(i)

    return water

print(trap_rain_water([0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1]))  # 6

常用算法模式：双指针与回溯

# 1. 双指针 — 有序数组两数之和
def two_sum_sorted(nums: List[int], target: int) -> List[int]:
    """有序数组中找两个数等于目标值"""
    left, right = 0, len(nums) - 1
    while left < right:
        s = nums[left] + nums[right]
        if s == target:
            return [left, right]
        elif s < target:
            left += 1
        else:
            right -= 1
    return []

# 2. 双指针 — 三数之和
def three_sum(nums: List[int]) -> List[List[int]]:
    """三数之和为 0 的所有组合"""
    nums.sort()
    result = []
    n = len(nums)

    for i in range(n - 2):
        if i > 0 and nums[i] == nums[i - 1]:
            continue  # 去重
        left, right = i + 1, n - 1
        while left < right:
            s = nums[i] + nums[left] + nums[right]
            if s == 0:
                result.append([nums[i], nums[left], nums[right]])
                left += 1
                right -= 1
                while left < right and nums[left] == nums[left - 1]:
                    left += 1  # 去重
                while left < right and nums[right] == nums[right + 1]:
                    right -= 1  # 去重
            elif s < 0:
                left += 1
            else:
                right -= 1
    return result

# 3. 回溯 — 全排列
def permute(nums: List[int]) -> List[List[int]]:
    """全排列 — 回溯法"""
    result = []
    n = len(nums)

    def backtrack(path: List[int], used: set):
        if len(path) == n:
            result.append(path[:])
            return
        for num in nums:
            if num in used:
                continue
            path.append(num)
            used.add(num)
            backtrack(path, used)
            path.pop()
            used.remove(num)

    backtrack([], set())
    return result

# 4. 回溯 — 组合总和
def combination_sum(candidates: List[int], target: int) -> List[List[int]]:
    """组合总和 — 可重复选取"""
    result = []

    def backtrack(start: int, path: List[int], remaining: int):
        if remaining == 0:
            result.append(path[:])
            return
        if remaining < 0:
            return
        for i in range(start, len(candidates)):
            path.append(candidates[i])
            backtrack(i, path, remaining - candidates[i])  # i 而非 i+1，允许重复
            path.pop()

    backtrack(0, [], target)
    return result

print(three_sum([-1, 0, 1, 2, -1, -4]))  # [[-1, -1, 2], [-1, 0, 1]]
print(permute([1, 2, 3]))  # 6 种排列
print(combination_sum([2, 3, 6, 7], 7))  # [[2, 2, 3], [7]]