(没啥大用)甄别图片中有问题的信息,并重命名

发布时间 2023-12-23 22:11:18作者: 不上火星不改名
import os
import shutil
import re
from nltk.corpus import words

# 设置英文字典
word_list = set(words.words())


def is_valid_word(word):
    return word.lower() in word_list


def get_new_name(base, ext, root):
    counter = 0
    new_name = base + ext
    while os.path.exists(os.path.join(root, new_name)):  # 确保新名称不会导致冲突
        counter += 1
        new_name = base + f"({counter})" + ext
    return new_name


def process_image_name(name, x, root):
    parts = name.split(',')
    if len(parts) - 1 < x:
        return name  # 不改变原始文件名

    base, ext = os.path.splitext(parts[x])
    valid_words = [word for word in re.split(r'\s+|\W+|\d+', base) if is_valid_word(word)]
    if len(valid_words) == len(base.split()):  # 所有单词都有效
        return name
    else:
        # 删除字段A并添加后缀
        new_base = ','.join(parts[:x])
        return get_new_name(new_base, ext, root)


def copy_structure(src, dest):
    for dirpath, dirnames, filenames in os.walk(src):
        structure = os.path.join(dest, os.path.relpath(dirpath, src))
        if not os.path.isdir(structure):
            os.makedirs(structure)


def main(parent_dir_a, target_dir_b, x):
    parent_dir_c = target_dir_b + "-副本"
    copy_structure(parent_dir_a, parent_dir_c)  # 复制文件夹结构

    renamed_images = []  # 记录重命名的图片集合B
    for root, dirs, files in os.walk(parent_dir_a):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                new_name = process_image_name(file, x, root)
                if new_name != file:
                    os.rename(os.path.join(root, file), os.path.join(root, new_name))
                    renamed_images.append((os.path.join(root, new_name),
                                           os.path.join(parent_dir_c, os.path.relpath(root, parent_dir_a), new_name)))

    for src, dest in renamed_images:  # 移动重命名的图片到父目录c
        shutil.move(src, dest)


if __name__ == "__main__":
    parent_dir_a = input("请输入父目录a(原始数据地址):")
    target_dir_b = input("请输入指定目录b(复制文件夹结构的目标位置):")
    x = int(input("请输入自然数X(处理第几个逗号后的内容):"))
    main(parent_dir_a, target_dir_b, x)