# 可能出现的错误

OSError: image file is truncated (7 bytes not processed)

OSError: broken data stream when reading image file

OSError: image file is truncated (1 bytes not processed)

# 解决方案 - 预检测脚本

# 预检测脚本(DeepSeek 生成)

import os
from PIL import Image
from tqdm import tqdm  # 用于显示进度条,可选
def check_image(file_path):
    try:
        with Image.open(file_path) as img:
            # 强制加载所有像素数据
            img.load()
            # 进一步验证图像完整性(可选)
            img.tobytes()
        return True
    except OSError as e:
        if "truncated" in str(e):
            print(f"损坏文件: {file_path} - 错误信息: {str(e)}")
            return False
        else:
            # 其他类型的 OSError 也需要处理
            print(f"无效文件: {file_path} - 错误信息: {str(e)}")
            return False
    except Exception as e:
        print(f"未知错误: {file_path} - 错误类型: {type(e).__name__}, 信息: {str(e)}")
        return False
def scan_images_directory(directory):
    corrupted_files = []
    
    # 支持常见图片格式
    image_extensions = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tiff']
    
    # 遍历目录
    for root, _, files in os.walk(directory):
        for file in tqdm(files, desc="正在扫描文件"):
            if any(file.lower().endswith(ext) for ext in image_extensions):
                file_path = os.path.join(root, file)
                if not check_image(file_path):
                    corrupted_files.append(file_path)
    
    # 输出总结报告
    print("\n扫描完成!")
    if corrupted_files:
        print(f"发现 {len(corrupted_files)} 个损坏文件:")
        for path in corrupted_files:
            print(f" - {path}")
    else:
        print("未发现损坏文件")
if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description="检测损坏的图片文件")
    parser.add_argument("directory", type=str, help="要扫描的目录路径")
    args = parser.parse_args()
    
    scan_images_directory(args.directory)

# 安装依赖

pip install Pillow tqdm

# 运行脚本

python check_images.py <你的图片目录路径>