文件读写与pathlib的重要性
文件读写是Python编程中的基础技能,而pathlib模块提供了更加现代化和优雅的路径操作方式。掌握高效的文件I/O操作和路径管理,能够显著提升代码的可读性和维护性。本文将从基础的文件读写到高级的文件管理,全面介绍Python文件操作的最佳实践。
基础文件读写
1. 文本文件操作
def text_file_operations_demo():
"""文本文件操作演示"""
print("=== 文本文件操作 ===")
from pathlib import Path
import os
from datetime import datetime
# 1. 使用pathlib进行文本文件操作
print("1. pathlib文本文件操作:")
def pathlib_text_operations():
"""pathlib文本文件操作"""
# 创建文件路径
file_path = Path("demo_text_2019.txt")
# 写入文本
text_content = """这是一个Python文件操作演示
创建时间: 2019-05-22
内容包含中文和英文
This is a demo for file operations
"""
file_path.write_text(text_content, encoding="utf-8")
print(f" 文件已创建: {file_path}")
print(f" 文件大小: {file_path.stat().st_size} 字节")
# 读取文本
read_content = file_path.read_text(encoding="utf-8")
print(f" 读取内容长度: {len(read_content)} 字符")
print(f" 内容预览: {read_content[:50]}...")
# 追加内容
append_content = "\n追加的内容 - 2019年5月22日"
with open(file_path, "a", encoding="utf-8") as f:
f.write(append_content)
print(f" 追加后文件大小: {file_path.stat().st_size} 字节")
return file_path, read_content
text_file, text_content = pathlib_text_operations()
# 2. 传统文件操作
print("\n2. 传统文件操作:")
def traditional_file_operations():
"""传统文件操作"""
file_path = "traditional_demo_2019.txt"
# 写入文件
with open(file_path, "w", encoding="utf-8") as f:
f.write("传统方式写入文件\n")
f.write(f"时间戳: {datetime.now().isoformat()}\n")
f.write("使用with语句确保文件正确关闭\n")
print(f" 传统方式文件已创建: {file_path}")
# 读取文件
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
print(f" 读取行数: {len(lines)}")
for i, line in enumerate(lines, 1):
print(f" 第{i}行: {line.strip()}")
# 逐行读取
with open(file_path, "r", encoding="utf-8") as f:
for line_num, line in enumerate(f, 1):
print(f" 逐行读取第{line_num}行: {line.strip()}")
return file_path, lines
traditional_file, file_lines = traditional_file_operations()
# 3. 文件编码处理
print("\n3. 文件编码处理:")
def encoding_handling():
"""文件编码处理"""
# 测试不同编码
encodings = ['utf-8', 'gbk', 'utf-16']
for encoding in encodings:
try:
test_file = Path(f"encoding_test_{encoding.replace('-', '_')}_2019.txt")
test_content = f"编码测试 - {encoding}\n中文测试\nEnglish test"
# 写入
test_file.write_text(test_content, encoding=encoding)
# 读取
read_back = test_file.read_text(encoding=encoding)
print(f" {encoding} 编码: 写入{len(test_content)}字符, 读取{len(read_back)}字符")
except Exception as e:
print(f" {encoding} 编码错误: {e}")
return encodings
encoding_results = encoding_handling()
return text_file, traditional_file, encoding_results
text_file_results = text_file_operations_demo()
2. 二进制文件操作
def binary_file_operations_demo():
"""二进制文件操作演示"""
print("\n=== 二进制文件操作 ===")
from pathlib import Path
import struct
import pickle
import json
# 1. 基础二进制读写
print("1. 基础二进制读写:")
def basic_binary_operations():
"""基础二进制读写"""
# 创建二进制数据
binary_data = b"\x00\x01\x02\x03\x04\x05"
# 使用pathlib写入
binary_file = Path("demo_binary_2019.bin")
binary_file.write_bytes(binary_data)
print(f" 二进制文件已创建: {binary_file}")
print(f" 原始数据: {binary_data}")
print(f" 文件大小: {binary_file.stat().st_size} 字节")
# 读取二进制数据
read_data = binary_file.read_bytes()
print(f" 读取数据: {read_data}")
print(f" 数据是否相等: {binary_data == read_data}")
return binary_file, binary_data
binary_file, binary_data = basic_binary_operations()
# 2. 结构化二进制数据
print("\n2. 结构化二进制数据:")
def structured_binary_data():
"""结构化二进制数据"""
# 使用struct打包数据
numbers = [2019, 5, 22, 15, 30, 45] # 年, 月, 日, 时, 分, 秒
packed_data = struct.pack('6i', *numbers)
struct_file = Path("structured_data_2019.bin")
struct_file.write_bytes(packed_data)
print(f" 打包的数据: {numbers}")
print(f" 二进制数据: {packed_data}")
print(f" 数据大小: {len(packed_data)} 字节")
# 解包数据
unpacked_data = struct.unpack('6i', struct_file.read_bytes())
print(f" 解包的数据: {list(unpacked_data)}")
print(f" 数据是否相等: {numbers == list(unpacked_data)}")
return struct_file, numbers
struct_file, struct_data = structured_binary_data()
# 3. 对象序列化
print("\n3. 对象序列化:")
def object_serialization():
"""对象序列化"""
# 创建测试数据
test_data = {
'name': 'Python文件操作',
'date': '2019-05-22',
'version': '3.7',
'features': ['pathlib', 'with语句', '二进制操作'],
'metadata': {'created_by': 'demo', 'file_type': 'binary'}
}
# Pickle序列化
pickle_file = Path("pickle_data_2019.pkl")
with open(pickle_file, "wb") as f:
pickle.dump(test_data, f)
print(f" Pickle文件已创建: {pickle_file}")
print(f" Pickle文件大小: {pickle_file.stat().st_size} 字节")
# 反序列化
with open(pickle_file, "rb") as f:
loaded_data = pickle.load(f)
print(f" 原始数据: {test_data}")
print(f" 加载数据: {loaded_data}")
print(f" 数据是否相等: {test_data == loaded_data}")
# JSON序列化(用于对比)
json_file = Path("json_data_2019.json")
with open(json_file, "w", encoding="utf-8") as f:
json.dump(test_data, f, ensure_ascii=False, indent=2)
print(f" JSON文件大小: {json_file.stat().st_size} 字节")
return pickle_file, json_file, test_data
pickle_file, json_file, serialized_data = object_serialization()
# 4. 大文件处理
print("\n4. 大文件处理:")
def large_file_handling():
"""大文件处理"""
# 创建大文件(模拟)
large_file = Path("large_file_2019.bin")
# 分块写入
chunk_size = 1024 # 1KB
total_size = 10 * 1024 # 10KB
with open(large_file, "wb") as f:
for i in range(0, total_size, chunk_size):
chunk = b"A" * min(chunk_size, total_size - i)
f.write(chunk)
print(f" 大文件已创建: {large_file}")
print(f" 文件大小: {large_file.stat().st_size} 字节")
# 分块读取
read_chunks = []
with open(large_file, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
read_chunks.append(chunk)
print(f" 读取块数: {len(read_chunks)}")
print(f" 每块大小: {[len(chunk) for chunk in read_chunks[:3]]}...")
return large_file, read_chunks
large_file, file_chunks = large_file_handling()
return binary_file, struct_file, pickle_file, large_file
binary_file_results = binary_file_operations_demo()
pathlib路径操作
1. 路径创建和操作
def pathlib_operations_demo():
"""pathlib操作演示"""
print("\n=== pathlib路径操作 ===")
from pathlib import Path
import os
from datetime import datetime
# 1. 路径创建和基本信息
print("1. 路径创建和基本信息:")
def path_creation_and_info():
"""路径创建和基本信息"""
# 创建路径对象
current_dir = Path(".")
home_dir = Path.home()
temp_dir = Path("/tmp")
print(f" 当前目录: {current_dir.absolute()}")
print(f" 家目录: {home_dir}")
print(f" 临时目录: {temp_dir}")
# 路径组合
demo_dir = current_dir / "demo_2019"
demo_file = demo_dir / "test.txt"
print(f" 演示目录: {demo_dir}")
print(f" 演示文件: {demo_file}")
# 路径属性
print(f" 文件父目录: {demo_file.parent}")
print(f" 文件名: {demo_file.name}")
print(f" 文件后缀: {demo_file.suffix}")
print(f" 文件词干: {demo_file.stem}")
# 路径存在性检查
print(f" 当前目录存在: {current_dir.exists()}")
print(f" 演示目录存在: {demo_dir.exists()}")
print(f" 演示文件存在: {demo_file.exists()}")
return demo_dir, demo_file
demo_dir, demo_file = path_creation_and_info()
# 2. 目录操作
print("\n2. 目录操作:")
def directory_operations():
"""目录操作"""
# 创建目录
test_dir = Path("test_directory_2019")
test_dir.mkdir(exist_ok=True)
print(f" 目录已创建: {test_dir}")
print(f" 目录存在: {test_dir.exists()}")
print(f" 是否为目录: {test_dir.is_dir()}")
# 创建嵌套目录
nested_dir = test_dir / "nested" / "deep" / "structure"
nested_dir.mkdir(parents=True, exist_ok=True)
print(f" 嵌套目录已创建: {nested_dir}")
# 列出目录内容
current_path = Path(".")
print(f" 当前目录内容:")
for item in current_path.iterdir():
item_type = "目录" if item.is_dir() else "文件"
print(f" {item.name} ({item_type})")
# 创建一些测试文件
for i in range(3):
test_file = test_dir / f"test_file_{i}.txt"
test_file.write_text(f"测试文件 {i} - 创建于 {datetime.now()}")
print(f" 在 {test_dir} 中创建了3个测试文件")
return test_dir, nested_dir
test_dir, nested_dir = directory_operations()
# 3. 文件搜索和过滤
print("\n3. 文件搜索和过滤:")
def file_search_and_filter():
"""文件搜索和过滤"""
# 递归搜索Python文件
py_files = list(Path(".").rglob("*.py"))
print(f" 找到Python文件: {len(py_files)} 个")
for py_file in py_files[:3]: # 显示前3个
print(f" {py_file}")
# 搜索特定模式
txt_files = list(Path(".").glob("*.txt"))
print(f" 找到文本文件: {len(txt_files)} 个")
# 搜索目录
directories = [p for p in Path(".").iterdir() if p.is_dir()]
print(f" 找到目录: {len(directories)} 个")
for directory in directories:
print(f" {directory}")
# 按修改时间过滤
from datetime import datetime, timedelta
recent_files = []
cutoff_time = datetime.now() - timedelta(hours=1)
for file_path in Path(".").rglob("*"):
if file_path.is_file():
file_mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
if file_mtime > cutoff_time:
recent_files.append(file_path)
print(f" 最近1小时修改的文件: {len(recent_files)} 个")
return py_files, txt_files, recent_files
py_files, txt_files, recent_files = file_search_and_filter()
# 4. 路径操作和转换
print("\n4. 路径操作和转换:")
def path_manipulation():
"""路径操作和转换"""
# 路径解析
complex_path = Path("/home/user/documents/project_2019/src/main.py")
print(f" 复杂路径: {complex_path}")
print(f" 绝对路径: {complex_path.absolute()}")
print(f" 规范路径: {complex_path.resolve()}")
print(f" 路径部分: {complex_path.parts}")
# 路径拼接
base_path = Path("/base/path")
relative_paths = ["subdir1", "subdir2", "file.txt"]
full_path = base_path
for part in relative_paths:
full_path = full_path / part
print(f" 拼接路径: {full_path}")
# 相对路径计算
path1 = Path("/home/user/documents/file1.txt")
path2 = Path("/home/user/pictures/file2.jpg")
try:
relative_path = path2.relative_to(path1.parent)
print(f" 相对路径: {relative_path}")
except ValueError:
print(" 无法计算相对路径")
# 路径验证
valid_path = Path("valid_path_2019")
invalid_chars = ['<', '>', ':', '"', '|', '?', '*']
print(f" 路径验证:")
for char in invalid_chars:
test_path = Path(f"test{char}path")
print(f" 包含'{char}'的路径: {test_path} (有效: {test_path.is_absolute() or not any(c in str(test_path) for c in invalid_chars)})")
return complex_path, full_path
complex_path, full_path = path_manipulation()
return demo_dir, test_dir, py_files, complex_path
pathlib_results = pathlib_operations_demo()
2. 高级文件管理
def advanced_file_management_demo():
"""高级文件管理演示"""
print("\n=== 高级文件管理 ===")
from pathlib import Path
import shutil
import tempfile
import hashlib
from datetime import datetime
# 1. 文件复制和移动
print("1. 文件复制和移动:")
def file_copy_and_move():
"""文件复制和移动"""
# 创建源文件
source_file = Path("source_file_2019.txt")
source_file.write_text("这是源文件内容\n创建时间: 2019-05-22")
print(f" 源文件已创建: {source_file}")
# 复制文件
copy_file = Path("copy_file_2019.txt")
shutil.copy2(source_file, copy_file)
print(f" 文件已复制: {copy_file}")
print(f" 复制文件存在: {copy_file.exists()}")
# 移动文件
move_dir = Path("move_directory")
move_dir.mkdir(exist_ok=True)
move_file = move_dir / "moved_file_2019.txt"
shutil.move(str(copy_file), str(move_file))
print(f" 文件已移动: {move_file}")
print(f" 原文件存在: {copy_file.exists()}")
print(f" 移动后文件存在: {move_file.exists()}")
return source_file, move_file
source_file, move_file = file_copy_and_move()
# 2. 文件信息获取
print("\n2. 文件信息获取:")
def file_information():
"""文件信息获取"""
# 获取文件统计信息
stat_info = source_file.stat()
print(f" 文件信息:")
print(f" 文件大小: {stat_info.st_size} 字节")
print(f" 创建时间: {datetime.fromtimestamp(stat_info.st_ctime)}")
print(f" 修改时间: {datetime.fromtimestamp(stat_info.st_mtime)}")
print(f" 访问时间: {datetime.fromtimestamp(stat_info.st_atime)}")
print(f" 文件模式: {oct(stat_info.st_mode)}")
# 文件权限
print(f" 文件权限:")
print(f" 可读: {source_file.stat().st_mode & 0o400}")
print(f" 可写: {source_file.stat().st_mode & 0o200}")
print(f" 可执行: {source_file.stat().st_mode & 0o100}")
# 文件类型判断
print(f" 文件类型:")
print(f" 是否为文件: {source_file.is_file()}")
print(f" 是否为目录: {source_file.is_dir()}")
print(f" 是否为符号链接: {source_file.is_symlink()}")
print(f" 是否为绝对路径: {source_file.is_absolute()}")
return stat_info
file_stat = file_information()
# 3. 文件哈希和校验
print("\n3. 文件哈希和校验:")
def file_hashing():
"""文件哈希和校验"""
# 计算文件哈希
def calculate_file_hash(file_path, algorithm='md5'):
hash_obj = hashlib.new(algorithm)
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_obj.update(chunk)
return hash_obj.hexdigest()
# 计算不同算法的哈希
algorithms = ['md5', 'sha1', 'sha256']
print(f" 文件哈希值:")
for algo in algorithms:
hash_value = calculate_file_hash(source_file, algo)
print(f" {algo.upper()}: {hash_value}")
# 文件完整性校验
original_hash = calculate_file_hash(source_file, 'md5')
moved_hash = calculate_file_hash(move_file, 'md5')
print(f" 文件完整性校验:")
print(f" 源文件MD5: {original_hash}")
print(f" 移动文件MD5: {moved_hash}")
print(f" 文件完整性: {'通过' if original_hash == moved_hash else '失败'}")
return original_hash, moved_hash
original_hash, moved_hash = file_hashing()
# 4. 临时文件处理
print("\n4. 临时文件处理:")
def temporary_file_handling():
"""临时文件处理"""
# 创建临时文件
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
temp_file.write("临时文件内容\n创建时间: 2019-05-22")
temp_path = Path(temp_file.name)
print(f" 临时文件已创建: {temp_path}")
print(f" 临时文件内容: {temp_path.read_text()}")
# 创建临时目录
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
temp_file_in_dir = temp_dir_path / "temp_file.txt"
temp_file_in_dir.write_text("临时目录中的文件")
print(f" 临时目录: {temp_dir_path}")
print(f" 临时目录中的文件: {temp_file_in_dir}")
print(f" 文件内容: {temp_file_in_dir.read_text()}")
# 清理临时文件
temp_path.unlink()
print(f" 临时文件已删除")
return temp_path
temp_file_path = temporary_file_handling()
# 5. 批量文件操作
print("\n5. 批量文件操作:")
def batch_file_operations():
"""批量文件操作"""
# 创建测试目录和文件
batch_dir = Path("batch_operations_2019")
batch_dir.mkdir(exist_ok=True)
# 创建多个测试文件
for i in range(5):
test_file = batch_dir / f"test_{i:02d}.txt"
test_file.write_text(f"测试文件 {i}\n批次操作 - 2019")
print(f" 批量目录已创建: {batch_dir}")
# 批量重命名
renamed_count = 0
for file_path in batch_dir.glob("test_*.txt"):
new_name = file_path.name.replace("test_", "renamed_")
new_path = file_path.parent / new_name
file_path.rename(new_path)
renamed_count += 1
print(f" 批量重命名完成: {renamed_count} 个文件")
# 批量删除
deleted_count = 0
for file_path in batch_dir.glob("renamed_*.txt"):
file_path.unlink()
deleted_count += 1
print(f" 批量删除完成: {deleted_count} 个文件")
# 清理目录
batch_dir.rmdir()
print(f" 目录已清理")
return batch_dir
batch_dir = batch_file_operations()
return source_file, move_file, original_hash, batch_dir
advanced_file_results = advanced_file_management_demo()
总结
文件读写与pathlib的关键要点:
- 文本文件操作:pathlib的read_text/write_text、传统with语句、编码处理
- 二进制文件操作:read_bytes/write_bytes、结构化数据、对象序列化
- pathlib路径操作:路径创建、组合、属性获取、存在性检查
- 目录管理:创建、遍历、搜索、过滤文件
- 高级文件管理:复制移动、信息获取、哈希校验、临时文件
- 批量操作:批量重命名、删除、处理
- 最佳实践:使用with语句、路径验证、错误处理、性能优化
掌握这些文件操作技能,可以高效处理各种文件I/O任务,构建健壮的文件管理系统,为Python项目提供可靠的文件处理支持。
转载请注明:周志洋的博客 » Python实用技巧-文件读写与pathlib详解


