Python 算法与数据结构
大约 11 分钟约 3443 字
Python 算法与数据结构
简介
算法与数据结构是编程的基础能力,直接影响代码的执行效率和可扩展性。Python 标准库提供了丰富的内置数据结构(list、dict、set、deque 等),同时可以自定义实现树、图、堆等高级结构。掌握常用算法模式和数据结构选择,是写出高效 Python 代码的前提。
特点
实现
核心数据结构实现
from typing import Any, Optional
from collections import deque
import heapq
# 1. 链表
class ListNode:
def __init__(self, val: Any, next_node=None):
self.val = val
self.next = next_node
class LinkedList:
"""单链表"""
def __init__(self):
self.head: Optional[ListNode] = None
self.size = 0
def append(self, val: Any) -> None:
new_node = ListNode(val)
if not self.head:
self.head = new_node
else:
current = self.head
while current.next:
current = current.next
current.next = new_node
self.size += 1
def prepend(self, val: Any) -> None:
self.head = ListNode(val, self.head)
self.size += 1
def delete(self, val: Any) -> bool:
if not self.head:
return False
if self.head.val == val:
self.head = self.head.next
self.size -= 1
return True
current = self.head
while current.next:
if current.next.val == val:
current.next = current.next.next
self.size -= 1
return True
current = current.next
return False
def reverse(self) -> None:
prev, current = None, self.head
while current:
next_node = current.next
current.next = prev
prev = current
current = next_node
self.head = prev
def to_list(self) -> list:
result = []
current = self.head
while current:
result.append(current.val)
current = current.next
return result
# 2. 二叉搜索树
class TreeNode:
def __init__(self, val: int):
self.val = val
self.left: Optional["TreeNode"] = None
self.right: Optional["TreeNode"] = None
class BST:
"""二叉搜索树"""
def __init__(self):
self.root: Optional[TreeNode] = None
def insert(self, val: int) -> None:
self.root = self._insert(self.root, val)
def _insert(self, node: Optional[TreeNode], val: int) -> TreeNode:
if not node:
return TreeNode(val)
if val < node.val:
node.left = self._insert(node.left, val)
elif val > node.val:
node.right = self._insert(node.right, val)
return node
def search(self, val: int) -> bool:
return self._search(self.root, val)
def _search(self, node: Optional[TreeNode], val: int) -> bool:
if not node:
return False
if val == node.val:
return True
return self._search(node.left if val < node.val else node.right, val)
def inorder(self) -> list[int]:
"""中序遍历(有序输出)"""
result = []
self._inorder(self.root, result)
return result
def _inorder(self, node: Optional[TreeNode], result: list) -> None:
if node:
self._inorder(node.left, result)
result.append(node.val)
self._inorder(node.right, result)
# 3. LRU 缓存
class LRUCache:
"""LRU 缓存(Python 标准库实现)"""
def __init__(self, capacity: int):
from collections import OrderedDict
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key: int) -> int:
if key not in self.cache:
return -1
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key: int, value: int) -> None:
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)
# 使用
lru = LRUCache(3)
lru.put(1, 100)
lru.put(2, 200)
lru.put(3, 300)
lru.get(1) # 100,key 1 变为最近使用
lru.put(4, 400) # 淘汰 key 2(最久未使用)
print(lru.get(2)) # -1(已淘汰)排序与搜索算法
from typing import List, Any
import bisect
# 1. 排序算法
def merge_sort(arr: List[int]) -> List[int]:
"""归并排序 - O(n log n)"""
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return _merge(left, right)
def _merge(left: List[int], right: List[int]) -> List[int]:
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
def quick_sort(arr: List[int]) -> List[int]:
"""快速排序 - O(n log n) 平均"""
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
# 2. 搜索算法
def binary_search(arr: List[int], target: int) -> int:
"""二分查找 - O(log n)"""
left, right = 0, len(arr) - 1
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1
def binary_search_leftmost(arr: List[int], target: int) -> int:
"""查找左边界(第一个等于 target 的位置)"""
left, right = 0, len(arr)
while left < right:
mid = (left + right) // 2
if arr[mid] < target:
left = mid + 1
else:
right = mid
return left if left < len(arr) and arr[left] == target else -1
# 使用 bisect 标准库
sorted_list = [1, 3, 3, 5, 7, 9]
print(bisect.bisect_left(sorted_list, 3)) # 1(左侧插入位置)
print(bisect.bisect_right(sorted_list, 3)) # 3(右侧插入位置)
bisect.insort(sorted_list, 4) # 插入并保持有序图算法
from collections import defaultdict, deque
from typing import Dict, List, Set, Optional
import heapq
class Graph:
"""图的常用算法"""
def __init__(self):
self.adj: Dict[str, List[tuple]] = defaultdict(list)
def add_edge(self, u: str, v: str, weight: int = 1, directed: bool = False):
self.adj[u].append((v, weight))
if not directed:
self.adj[v].append((u, weight))
def bfs(self, start: str) -> List[str]:
"""广度优先搜索"""
visited = set([start])
queue = deque([start])
order = []
while queue:
node = queue.popleft()
order.append(node)
for neighbor, _ in self.adj[node]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return order
def dfs(self, start: str) -> List[str]:
"""深度优先搜索"""
visited = set()
order = []
def _dfs(node: str):
visited.add(node)
order.append(node)
for neighbor, _ in self.adj[node]:
if neighbor not in visited:
_dfs(neighbor)
_dfs(start)
return order
def dijkstra(self, start: str) -> Dict[str, int]:
"""Dijkstra 最短路径"""
distances = {node: float("inf") for node in self.adj}
distances[start] = 0
pq = [(0, start)]
while pq:
dist, node = heapq.heappop(pq)
if dist > distances[node]:
continue
for neighbor, weight in self.adj[node]:
new_dist = dist + weight
if new_dist < distances[neighbor]:
distances[neighbor] = new_dist
heapq.heappush(pq, (new_dist, neighbor))
return distances
def has_cycle(self) -> bool:
"""检测环(有向图)"""
WHITE, GRAY, BLACK = 0, 1, 2
color = {node: WHITE for node in self.adj}
def dfs(node: str) -> bool:
color[node] = GRAY
for neighbor, _ in self.adj[node]:
if color[neighbor] == GRAY:
return True
if color[neighbor] == WHITE and dfs(neighbor):
return True
color[node] = BLACK
return False
return any(dfs(node) for node in self.adj if color[node] == WHITE)
# 使用
g = Graph()
g.add_edge("A", "B", 4)
g.add_edge("A", "C", 2)
g.add_edge("B", "C", 1)
g.add_edge("B", "D", 5)
g.add_edge("C", "D", 8)
print(f"BFS: {g.bfs('A')}") # BFS: ['A', 'B', 'C', 'D']
print(f"DFS: {g.dfs('A')}") # DFS: ['A', 'B', 'C', 'D']
print(f"最短路径: {g.dijkstra('A')}") # {'A': 0, 'B': 4, 'C': 2, 'D': 9}动态规划与常用算法模式
# 1. 动态规划:最长递增子序列
def length_of_lis(nums: List[int]) -> int:
"""O(n log n) 解法"""
tails = []
for num in nums:
idx = bisect.bisect_left(tails, num)
if idx == len(tails):
tails.append(num)
else:
tails[idx] = num
return len(tails)
# 2. 动态规划:背包问题
def knapsack(weights: List[int], values: List[int], capacity: int) -> int:
"""0-1 背包问题"""
n = len(weights)
dp = [0] * (capacity + 1)
for i in range(n):
for w in range(capacity, weights[i] - 1, -1):
dp[w] = max(dp[w], dp[w - weights[i]] + values[i])
return dp[capacity]
# 3. 前缀和
class PrefixSum:
"""前缀和(区间查询 O(1))"""
def __init__(self, arr: List[int]):
self.prefix = [0]
for num in arr:
self.prefix.append(self.prefix[-1] + num)
def range_sum(self, left: int, right: int) -> int:
"""查询区间 [left, right] 的和"""
return self.prefix[right + 1] - self.prefix[left]
# 4. 滑动窗口
def max_window_sum(arr: List[int], k: int) -> int:
"""滑动窗口最大和"""
if len(arr) < k:
return sum(arr)
window_sum = sum(arr[:k])
max_sum = window_sum
for i in range(k, len(arr)):
window_sum += arr[i] - arr[i - k]
max_sum = max(max_sum, window_sum)
return max_sum
# 5. 并查集
class UnionFind:
"""并查集"""
def __init__(self, n: int):
self.parent = list(range(n))
self.rank = [0] * n
self.count = n # 连通分量数
def find(self, x: int) -> int:
if self.parent[x] != x:
self.parent[x] = self.find(self.parent[x]) # 路径压缩
return self.parent[x]
def union(self, x: int, y: int) -> bool:
px, py = self.find(x), self.find(y)
if px == py:
return False
if self.rank[px] < self.rank[py]:
px, py = py, px
self.parent[py] = px
if self.rank[px] == self.rank[py]:
self.rank[px] += 1
self.count -= 1
return True
def connected(self, x: int, y: int) -> bool:
return self.find(x) == self.find(y)
# 使用示例
print(f"LIS 长度: {length_of_lis([10, 9, 2, 5, 3, 7, 101, 18])}") # 4
print(f"背包最大值: {knapsack([1, 3, 4, 5], [1, 4, 5, 7], 7)}") # 9
ps = PrefixSum([1, 2, 3, 4, 5])
print(f"区间 [1,3] 和: {ps.range_sum(1, 3)}") # 9
print(f"窗口最大和: {max_window_sum([2, 1, 5, 1, 3, 2], 3)}") # 9
uf = UnionFind(5)
uf.union(0, 1)
uf.union(2, 3)
print(f"连通分量: {uf.count}") # 3
print(f"0-1 连通: {uf.connected(0, 1)}") # True
print(f"0-2 连通: {uf.connected(0, 2)}") # False高级数据结构:Trie 字典树与单调栈
# 1. Trie 字典树
class TrieNode:
def __init__(self):
self.children: dict[str, TrieNode] = {}
self.is_end = False
self.count = 0 # 经过此节点的单词数
class Trie:
"""字典树 — 高效字符串检索"""
def __init__(self):
self.root = TrieNode()
def insert(self, word: str) -> None:
node = self.root
for ch in word:
if ch not in node.children:
node.children[ch] = TrieNode()
node = node.children[ch]
node.count += 1
node.is_end = True
def search(self, word: str) -> bool:
"""完全匹配"""
node = self._find(word)
return node is not None and node.is_end
def starts_with(self, prefix: str) -> bool:
"""前缀搜索"""
return self._find(prefix) is not None
def count_prefix(self, prefix: str) -> int:
"""统计以 prefix 为前缀的单词数"""
node = self._find(prefix)
return node.count if node else 0
def auto_complete(self, prefix: str, limit: int = 10) -> List[str]:
"""自动补全建议"""
node = self._find(prefix)
if not node:
return []
results = []
self._dfs(node, prefix, results, limit)
return results
def _find(self, prefix: str) -> Optional[TrieNode]:
node = self.root
for ch in prefix:
if ch not in node.children:
return None
node = node.children[ch]
return node
def _dfs(self, node: TrieNode, path: str, results: List[str], limit: int):
if len(results) >= limit:
return
if node.is_end:
results.append(path)
for ch, child in node.children.items():
self._dfs(child, path + ch, results, limit)
# 使用 Trie
trie = Trie()
for word in ["apple", "application", "apply", "app", "banana", "band"]:
trie.insert(word)
print(trie.search("apple")) # True
print(trie.starts_with("app")) # True
print(trie.count_prefix("app")) # 4
print(trie.auto_complete("app", 3)) # ['app', 'apple', 'application']
# 2. 单调栈 — 下一个更大元素
def next_greater_element(nums: List[int]) -> List[int]:
"""O(n) 解法 — 单调递减栈"""
n = len(nums)
result = [-1] * n
stack = [] # 存下标
for i in range(n):
while stack and nums[i] > nums[stack[-1]]:
idx = stack.pop()
result[idx] = nums[i]
stack.append(i)
return result
print(next_greater_element([2, 1, 2, 4, 3])) # [4, 2, 4, -1, -1]
# 3. 单调栈 — 接雨水问题
def trap_rain_water(height: List[int]) -> int:
"""接雨水 — 单调栈 O(n)"""
stack = []
water = 0
for i in range(len(height)):
while stack and height[i] > height[stack[-1]]:
bottom = stack.pop()
if not stack:
break
left = stack[-1]
width = i - left - 1
h = min(height[left], height[i]) - height[bottom]
water += width * h
stack.append(i)
return water
print(trap_rain_water([0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1])) # 6常用算法模式:双指针与回溯
# 1. 双指针 — 有序数组两数之和
def two_sum_sorted(nums: List[int], target: int) -> List[int]:
"""有序数组中找两个数等于目标值"""
left, right = 0, len(nums) - 1
while left < right:
s = nums[left] + nums[right]
if s == target:
return [left, right]
elif s < target:
left += 1
else:
right -= 1
return []
# 2. 双指针 — 三数之和
def three_sum(nums: List[int]) -> List[List[int]]:
"""三数之和为 0 的所有组合"""
nums.sort()
result = []
n = len(nums)
for i in range(n - 2):
if i > 0 and nums[i] == nums[i - 1]:
continue # 去重
left, right = i + 1, n - 1
while left < right:
s = nums[i] + nums[left] + nums[right]
if s == 0:
result.append([nums[i], nums[left], nums[right]])
left += 1
right -= 1
while left < right and nums[left] == nums[left - 1]:
left += 1 # 去重
while left < right and nums[right] == nums[right + 1]:
right -= 1 # 去重
elif s < 0:
left += 1
else:
right -= 1
return result
# 3. 回溯 — 全排列
def permute(nums: List[int]) -> List[List[int]]:
"""全排列 — 回溯法"""
result = []
n = len(nums)
def backtrack(path: List[int], used: set):
if len(path) == n:
result.append(path[:])
return
for num in nums:
if num in used:
continue
path.append(num)
used.add(num)
backtrack(path, used)
path.pop()
used.remove(num)
backtrack([], set())
return result
# 4. 回溯 — 组合总和
def combination_sum(candidates: List[int], target: int) -> List[List[int]]:
"""组合总和 — 可重复选取"""
result = []
def backtrack(start: int, path: List[int], remaining: int):
if remaining == 0:
result.append(path[:])
return
if remaining < 0:
return
for i in range(start, len(candidates)):
path.append(candidates[i])
backtrack(i, path, remaining - candidates[i]) # i 而非 i+1,允许重复
path.pop()
backtrack(0, [], target)
return result
print(three_sum([-1, 0, 1, 2, -1, -4])) # [[-1, -1, 2], [-1, 0, 1]]
print(permute([1, 2, 3])) # 6 种排列
print(combination_sum([2, 3, 6, 7], 7)) # [[2, 2, 3], [7]]优点
缺点
总结
算法与数据结构是编程能力的基石。Python 虽然运行速度不如编译型语言,但凭借简洁的语法和强大的标准库,是算法学习和快速验证的最佳选择。掌握链表、树、图等基础结构的实现,以及排序、搜索、动态规划等核心算法模式,能够显著提升解决实际工程问题的能力。
关键知识点
- Python dict 基于哈希表,查找/插入 O(1);list 基于动态数组,尾部操作 O(1),中间插入 O(n)
- collections.deque 是双端队列,首尾操作都是 O(1),适合 BFS 和滑动窗口
- heapq 模块实现最小堆,heappush/heappop 都是 O(log n)
- bisect 模块在有序列表上实现 O(log n) 的二分查找
项目落地视角
- 选择数据结构时优先使用 Python 内置结构(dict、set、deque)
- 性能关键路径用 collections.namedtuple 或 slots 减少内存开销
- 大数据处理用生成器和 itertools 替代列表,降低内存占用
- 时间复杂度分析帮助预估系统在数据量增长时的表现
常见误区
- 在循环中用 list.index() 或 in list 查找,应该用 set 或 dict
- 忽略 Python 的 list 中间插入 O(n) 开销,高频场景应换用 deque
- 过度追求算法极致优化,忽视代码可读性和实际数据规模
- 递归深度超限(RecursionError),应改用栈或迭代方式
进阶路线
- 系统学习 LeetCode 常见算法模式(滑动窗口、双指针、单调栈等)
- 研究 Python 标准库中数据结构的 C 实现原理
- 了解 numba 和 Cython 加速 Python 算法的方法
- 探索分布式算法和大数据处理框架(Spark、Ray)
适用场景
- 技术面试中考察编程能力和问题解决思路
- 需要选择合适数据结构优化业务代码性能
- 数据处理和计算密集型任务的算法选型
落地建议
- 项目中统一使用 dict/set 进行快速查找,避免 O(n) 的线性搜索
- 用 deque 替代 list 实现队列操作(首部插入/删除)
- 关键路径的性能敏感代码使用 timeit 或 cProfile 验证
排错清单
- 检查是否存在 O(n^2) 的嵌套循环,可用 dict/set 优化为 O(n)
- 确认递归深度是否超过 Python 默认限制(sys.setrecursionlimit)
- 排查内存占用是否因大量临时列表导致,考虑使用生成器
复盘问题
- 你的项目中哪些操作的时间复杂度是 O(n^2)?能否优化为 O(n log n) 或 O(n)?
- 选择数据结构时是否考虑了数据规模的增长趋势?
- 团队成员是否能正确分析代码的时间复杂度和空间复杂度?
