数据结构进阶的重要性
Python提供了丰富的高级数据结构,这些数据结构在标准库中经过优化,能够高效处理各种复杂的数据操作。掌握这些高级数据结构对于编写高效、可维护的Python代码至关重要。
collections模块
1. defaultdict - 默认字典
from collections import defaultdict, Counter, deque, namedtuple, OrderedDict
from datetime import datetime
def defaultdict_examples():
"""defaultdict示例"""
print("=== defaultdict示例 ===")
# 基本用法
dd = defaultdict(list)
dd['fruits'].append('apple')
dd['fruits'].append('banana')
dd['vegetables'].append('carrot')
print(f"defaultdict: {dict(dd)}")
# 计数器
word_count = defaultdict(int)
text = "hello world python programming hello world"
for word in text.split():
word_count[word] += 1
print(f"单词计数: {dict(word_count)}")
# 嵌套字典
nested_dd = defaultdict(lambda: defaultdict(list))
nested_dd['group1']['items'].append('item1')
nested_dd['group1']['items'].append('item2')
nested_dd['group2']['items'].append('item3')
print(f"嵌套字典: {dict(nested_dd)}")
# 日期分组
dates = ['2018-11-20', '2018-11-21', '2018-11-20', '2018-11-22']
date_groups = defaultdict(list)
for date in dates:
date_groups[date].append(f"event_{len(date_groups[date]) + 1}")
print(f"日期分组: {dict(date_groups)}")
defaultdict_examples()
2. Counter - 计数器
def counter_examples():
"""Counter示例"""
print("=== Counter示例 ===")
# 基本计数
text = "hello world python programming"
char_counter = Counter(text)
print(f"字符计数: {char_counter}")
print(f"最常见的5个字符: {char_counter.most_common(5)}")
# 单词计数
words = text.split()
word_counter = Counter(words)
print(f"单词计数: {word_counter}")
# 数学运算
counter1 = Counter(['a', 'b', 'c', 'a', 'b'])
counter2 = Counter(['a', 'b', 'd'])
print(f"Counter1: {counter1}")
print(f"Counter2: {counter2}")
print(f"相加: {counter1 + counter2}")
print(f"相减: {counter1 - counter2}")
print(f"交集: {counter1 & counter2}")
print(f"并集: {counter1 | counter2}")
# 实际应用:投票统计
votes = ['张三', '李四', '王五', '张三', '李四', '张三', '赵六']
vote_counter = Counter(votes)
winner = vote_counter.most_common(1)[0]
print(f"投票结果: {dict(vote_counter)}")
print(f"获胜者: {winner[0]} (得票: {winner[1]})")
counter_examples()
3. deque - 双端队列
def deque_examples():
"""deque示例"""
print("=== deque示例 ===")
# 基本操作
dq = deque([1, 2, 3, 4, 5])
print(f"初始deque: {dq}")
# 两端操作
dq.appendleft(0) # 左端添加
dq.append(6) # 右端添加
print(f"添加后: {dq}")
# 弹出操作
left_item = dq.popleft()
right_item = dq.pop()
print(f"左端弹出: {left_item}, 右端弹出: {right_item}")
print(f"弹出后: {dq}")
# 旋转操作
dq.rotate(2) # 向右旋转2位
print(f"旋转后: {dq}")
# 实际应用:滑动窗口
def sliding_window_max(nums, k):
"""滑动窗口最大值"""
dq = deque()
result = []
for i, num in enumerate(nums):
# 移除超出窗口的元素
while dq and dq[0] <= i - k:
dq.popleft()
# 移除比当前元素小的元素
while dq and nums[dq[-1]] < num:
dq.pop()
dq.append(i)
if i >= k - 1:
result.append(nums[dq[0]])
return result
nums = [1, 3, -1, -3, 5, 3, 6, 7]
k = 3
result = sliding_window_max(nums, k)
print(f"滑动窗口最大值: {result}")
deque_examples()
4. namedtuple - 命名元组
def namedtuple_examples():
"""namedtuple示例"""
print("=== namedtuple示例 ===")
# 基本用法
Point = namedtuple('Point', ['x', 'y'])
p1 = Point(1, 2)
p2 = Point(3, 4)
print(f"点1: {p1}, x坐标: {p1.x}, y坐标: {p1.y}")
print(f"点2: {p2}")
# 计算距离
distance = ((p2.x - p1.x)**2 + (p2.y - p1.y)**2)**0.5
print(f"两点距离: {distance:.2f}")
# 学生信息
Student = namedtuple('Student', ['name', 'age', 'grade', 'department'])
students = [
Student('张三', 20, 85, '计算机'),
Student('李四', 19, 92, '数学'),
Student('王五', 21, 78, '物理'),
]
print("学生信息:")
for student in students:
print(f" {student.name}: {student.age}岁, {student.grade}分, {student.department}系")
# 统计信息
avg_grade = sum(s.grade for s in students) / len(students)
print(f"平均成绩: {avg_grade:.1f}")
namedtuple_examples()
heapq模块 - 堆操作
1. 基本堆操作
import heapq
def heapq_examples():
"""heapq示例"""
print("=== heapq示例 ===")
# 基本堆操作
heap = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(heap)
print(f"堆化后: {heap}")
# 弹出最小元素
min_item = heapq.heappop(heap)
print(f"最小元素: {min_item}")
print(f"弹出后: {heap}")
# 添加元素
heapq.heappush(heap, 0)
print(f"添加0后: {heap}")
# 获取最小元素(不弹出)
min_item = heap[0]
print(f"当前最小元素: {min_item}")
# 合并多个堆
heap1 = [1, 3, 5]
heap2 = [2, 4, 6]
merged = list(heapq.merge(heap1, heap2))
print(f"合并堆: {merged}")
heapq_examples()
2. 高级堆应用
def advanced_heap_examples():
"""高级堆应用"""
print("=== 高级堆应用 ===")
# 最大堆(通过负数实现)
max_heap = []
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
for num in numbers:
heapq.heappush(max_heap, -num) # 存储负数
print("最大堆(负数实现):")
while max_heap:
max_item = -heapq.heappop(max_heap) # 取负数得到最大值
print(f" {max_item}")
# 前K个最大元素
def top_k_largest(nums, k):
"""返回前K个最大元素"""
return heapq.nlargest(k, nums)
def top_k_smallest(nums, k):
"""返回前K个最小元素"""
return heapq.nsmallest(k, nums)
numbers = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
print(f"原数组: {numbers}")
print(f"前3个最大元素: {top_k_largest(numbers, 3)}")
print(f"前3个最小元素: {top_k_smallest(numbers, 3)}")
# 堆排序
def heap_sort(nums):
"""堆排序"""
heap = nums.copy()
heapq.heapify(heap)
return [heapq.heappop(heap) for _ in range(len(heap))]
sorted_nums = heap_sort(numbers)
print(f"堆排序结果: {sorted_nums}")
advanced_heap_examples()
bisect模块 - 二分查找
1. 基本二分查找
import bisect
def bisect_examples():
"""bisect示例"""
print("=== bisect示例 ===")
# 有序列表
sorted_list = [1, 3, 5, 7, 9, 11, 13, 15]
print(f"有序列表: {sorted_list}")
# 查找插入位置
target = 6
insert_pos = bisect.bisect_left(sorted_list, target)
print(f"插入位置(左): {insert_pos}")
insert_pos = bisect.bisect_right(sorted_list, target)
print(f"插入位置(右): {insert_pos}")
# 插入元素
bisect.insort_left(sorted_list, 6)
print(f"插入6后: {sorted_list}")
# 查找元素
target = 7
index = bisect.bisect_left(sorted_list, target)
if index < len(sorted_list) and sorted_list[index] == target:
print(f"找到元素{target},位置: {index}")
else:
print(f"未找到元素{target}")
bisect_examples()
2. 高级二分查找应用
def advanced_bisect_examples():
"""高级二分查找应用"""
print("=== 高级二分查找应用 ===")
# 查找范围
def find_range(nums, target):
"""查找目标值的范围"""
left = bisect.bisect_left(nums, target)
right = bisect.bisect_right(nums, target)
return [left, right - 1] if left < right else [-1, -1]
nums = [1, 2, 2, 2, 3, 4, 4, 5]
target = 2
range_result = find_range(nums, target)
print(f"数组: {nums}")
print(f"目标值{target}的范围: {range_result}")
# 插入排序
def insertion_sort(nums):
"""使用bisect的插入排序"""
result = []
for num in nums:
bisect.insort_left(result, num)
return result
unsorted = [3, 1, 4, 1, 5, 9, 2, 6]
sorted_result = insertion_sort(unsorted)
print(f"原数组: {unsorted}")
print(f"插入排序: {sorted_result}")
# 查找最接近的元素
def find_closest(nums, target):
"""查找最接近的元素"""
pos = bisect.bisect_left(nums, target)
if pos == 0:
return nums[0]
if pos == len(nums):
return nums[-1]
if target - nums[pos - 1] <= nums[pos] - target:
return nums[pos - 1]
return nums[pos]
nums = [1, 3, 5, 7, 9, 11, 13, 15]
target = 6
closest = find_closest(nums, target)
print(f"最接近{target}的元素: {closest}")
advanced_bisect_examples()
实际应用案例
1. 任务调度系统
def task_scheduler_example():
"""任务调度系统示例"""
print("=== 任务调度系统 ===")
import heapq
from datetime import datetime, timedelta
class Task:
def __init__(self, name, priority, duration):
self.name = name
self.priority = priority # 数字越小优先级越高
self.duration = duration
self.created_at = datetime.now()
def __lt__(self, other):
return self.priority < other.priority
def __repr__(self):
return f"Task({self.name}, priority={self.priority}, duration={self.duration})"
# 任务队列
task_queue = []
# 添加任务
tasks = [
Task("高优先级任务", 1, 30),
Task("中优先级任务", 2, 60),
Task("低优先级任务", 3, 45),
Task("紧急任务", 0, 15),
]
for task in tasks:
heapq.heappush(task_queue, task)
print("任务执行顺序:")
while task_queue:
task = heapq.heappop(task_queue)
print(f" 执行: {task}")
task_scheduler_example()
2. 数据统计系统
def data_statistics_example():
"""数据统计系统示例"""
print("=== 数据统计系统 ===")
from collections import Counter, defaultdict
import heapq
# 模拟数据
data = [
{'user': '张三', 'action': 'login', 'timestamp': '2018-11-20 09:00:00'},
{'user': '李四', 'action': 'login', 'timestamp': '2018-11-20 09:05:00'},
{'user': '张三', 'action': 'view', 'timestamp': '2018-11-20 09:10:00'},
{'user': '王五', 'action': 'login', 'timestamp': '2018-11-20 09:15:00'},
{'user': '李四', 'action': 'purchase', 'timestamp': '2018-11-20 09:20:00'},
{'user': '张三', 'action': 'purchase', 'timestamp': '2018-11-20 09:25:00'},
]
# 用户行为统计
user_actions = defaultdict(list)
action_counter = Counter()
for record in data:
user_actions[record['user']].append(record['action'])
action_counter[record['action']] += 1
print("用户行为统计:")
for user, actions in user_actions.items():
print(f" {user}: {actions}")
print("\\n行为统计:")
for action, count in action_counter.most_common():
print(f" {action}: {count}次")
# 活跃用户排名
user_activity = {user: len(actions) for user, actions in user_actions.items()}
top_users = heapq.nlargest(2, user_activity.items(), key=lambda x: x[1])
print(f"\\n最活跃用户: {top_users}")
data_statistics_example()
总结
掌握Python高级数据结构是编写高效代码的关键:
- collections模块:理解defaultdict、Counter、deque、namedtuple等高级容器
- heapq模块:掌握堆操作、优先级队列、堆排序等
- bisect模块:了解二分查找、插入排序、范围查找等
- 实际应用:在任务调度、数据统计等场景中的应用
- 性能优化:选择合适的数据结构提高程序效率
- 最佳实践:遵循数据结构使用的最佳实践
通过系统学习这些概念,你将能够编写出更加高效、优雅的Python代码,提高程序的性能和可维护性。
转载请注明:周志洋的博客 » Python实用技巧-数据结构进阶


