每次看到喜欢的图就存,导至图片有很多存重复的,一直想把重复的图片删掉
发布网友
发布时间:2022-04-30 07:50
我来回答
共5个回答
热心网友
时间:2023-10-21 10:32
不知道楼主是否还要哦 我也有这个需求,但是没找到感觉合适的软件,于是自己写了一段代码,喜欢的话可以用的。
# -*- coding:utf-8 -*-
from PIL import Image
from PIL import ImageFile
import sys
from webUrlGetor.settings import *
ImageFile.LOAD_TRUNCATED_IMAGES = True
class SavePic(object):
def __init__(self):
pass
def getGray(self, image_file):
tmpls = []
for h in range(0, image_file.size[1]): # h
for w in range(0, image_file.size[0]): # w
tmpls.append(image_file.getpixel((w, h)))
return tmpls
def getAvg(self, ls): # 获取平均灰度值
return sum(ls) / len(ls)
def getImgHash(self, fne):
image_file = Image.open(fne) # 打开
image_file = image_file.resize((480, 480)) # 重置图片大小我12px X 12px
image_file = image_file.convert("L") # 转256灰度图
Grayls = self.getGray(image_file) # 灰度集合
avg = self.getAvg(Grayls) # 灰度平均值
bitls = '' # 接收获取0或1
# 除去变宽1px遍历像素
for h in range(1, image_file.size[1] - 1): # h
for w in range(1, image_file.size[0] - 1): # w
if image_file.getpixel((w, h)) >= avg: # 像素的值比较平均值 大于记为1 小于记为0
bitls = bitls + '1'
else:
bitls = bitls + '0'
return bitls
def getMH(self, a, b): # 比较100个字符有几个字符相同
dist = 0
print a
print b
for i in range(0, len(a)):
if a[i] == b[i]:
dist = dist + 1
dist_done = format(float(dist) / float(len(a)), accuracy)
return dist_done
def compare_pic_hash(self, hash_a, hash_b):
"""
:param hash_a: 图片A的hash
:param hash_b: 图片b的hash
:return:
"""
compare = self.getMH(hash_a, hash_b)
return compare
def get_file_name_list_from_path(self, file_path):
"""
:return:
"""
file_name_list = []
for file_name in os.listdir(file_path):
if os.path.getsize(os.path.join(file_path, file_name)) < 102400:
try:
os.remove(os.path.join(os.path.join(file_path, file_name)))
except Exception as e:
print "删除失败,原因:", str(e)
else:
if not (os.path.split(file_name)[1].endswith(".jpg") or os.path.split(file_name)[1].endswith(".png")):
pass
else:
file_name_list.append(file_name)
return file_name_list
def get_file_hash_list_from_file_name_list(self, file_path, file_name_list):
"""
:return:
"""
file_hash_list = []
try:
for i in range(0, len(file_name_list)):
hash_of_file = self.getImgHash(os.path.join(file_path, file_name_list[i]))
file_hash_list.append(hash_of_file)
self.view_bar(i, len(file_name_list))
except Exception as e:
print "获取文件hash错误!", str(e)
return file_hash_list
def getDocSize(self, path):
try:
size = os.path.getsize(path)
return size
except Exception as err:
print(err)
def view_bar(self, num, total):
rate = float(num) / total
rate_num = int(rate * 100) + 1
r = '\r[%s%s]%d%%' % ("#" * rate_num, " " * (100 - rate_num), rate_num,)
sys.stdout.write(r)
sys.stdout.flush()
def compare_pic_hash_list(self, file_hash_list, file_path, file_name_list):
"""
:param file_name_list:
:param file_path:
:param file_hash_list:
:return:
"""
if len(file_name_list) != len(file_hash_list):
sys.exit("hash list 与 file list下标不一致!")
total = (len(file_name_list) * (len(file_name_list) - 1)) / 2
print "\n共需要对比", total
need_delete_list_file_name = []
count = 0
for i in range(0, len(file_hash_list)):
for j in range(i + 1, len(file_hash_list)):
compare = self.compare_pic_hash(file_hash_list[i], file_hash_list[j])
if float(compare) - float(like) > 0:
file_i_size = self.getDocSize(os.path.join(file_path, file_name_list[i]))
file_j_size = self.getDocSize(os.path.join(file_path, file_name_list[j]))
if file_i_size - file_j_size > 0:
need_delete_list_file_name.append(file_name_list[j])
break
else:
need_delete_list_file_name.append(file_name_list[i])
continue
count = count + 1
self.view_bar(count, total)
news_ids = []
for need_delete_file_path in need_delete_list_file_name:
if need_delete_file_path not in news_ids:
news_ids.append(need_delete_file_path)
print "\n去重后的需要删除的长度", len(news_ids)
return news_ids
def delete_file(self, file_path, need_delete_list_file_name):
"""
:return:
"""
for need_delete_file_name in need_delete_list_file_name:
try:
os.remove(os.path.join(file_path, need_delete_file_name))
except Exception as e:
print "删除失败,原因:", str(e)
def main(self, file_path):
"""
aa
:return:
"""
# 获取所有文件的文件名,放入list
# 获取所有文件的hash 多线程
# 对比文件hash值
# 如果hash值相似度大于95%
# 删小留大,否则pass 将需要删除的文件放入 待删除list
# 单线程删除文件
file_name_list = self.get_file_name_list_from_path(file_path)
print "文件夹下共包含文件", len(file_name_list), "个"
file_hash_list = self.get_file_hash_list_from_file_name_list(file_path, file_name_list)
need_delete_list_file_name = self.compare_pic_hash_list(file_hash_list, file_path, file_name_list)
need_delete_list_file_name_new = []
for item in need_delete_list_file_name:
if "uncensored" not in item:
need_delete_list_file_name_new.append(item)
self.delete_file(file_path, need_delete_list_file_name_new)
return need_delete_list_file_name_new
def file_name(self, file_dir):
for root, dirs, files in os.walk(file_dir):
return dirs # 当前路径下所有子目录
if __name__ == '__main__':
x = SavePic()
now_dir = os.path.dirname(os.path.abspath(__file__))
project_dir = os.path.split(now_dir)[0]
img_path = os.path.join(project_dir, "images")
img_info = x.file_name(img_path)
# print img_info
# for i in range(0, len(img_info)):
# print "当前对比的文件夹", img_info[i]
# for j in range(0, 3):
# print "正在进行的去重次数", i
# file_patha = os.path.join(img_path, img_info[i])
# need_delete_list_file_name = x.main(file_patha)
# if len(need_delete_list_file_name) == 0:
# break
for j in range(0, 3):
print "正在进行的去重次数", j
# file_patha = os.path.join(img_path, tags)
file_patha = os.path.join(img_path, "kaetzchen")
need_delete_list_file_name = x.main(file_patha)
if len(need_delete_list_file_name) == 0:
break
os.system('say "去重完成!"')
热心网友
时间:2023-10-21 10:33
存储空间吧 不仅能筛选重复的图片还能筛选重复的文档之类的,我自己在用,觉得还不错,挺能清理内存的,
热心网友
时间:2023-10-21 10:33
买个小米手机吧,自带的管家,直接识别相似照片
热心网友
时间:2023-10-21 10:34
在手机管家上!你打开手机管家点空间清理,看到图片那栏点开就有
热心网友
时间:2023-10-21 10:35
最好把手机用数据线连接到电脑,电脑里删除比较快速。
热心网友
时间:2023-10-21 10:33
不知道楼主是否还要哦 我也有这个需求,但是没找到感觉合适的软件,于是自己写了一段代码,喜欢的话可以用的。
# -*- coding:utf-8 -*-
from PIL import Image
from PIL import ImageFile
import sys
from webUrlGetor.settings import *
ImageFile.LOAD_TRUNCATED_IMAGES = True
class SavePic(object):
def __init__(self):
pass
def getGray(self, image_file):
tmpls = []
for h in range(0, image_file.size[1]): # h
for w in range(0, image_file.size[0]): # w
tmpls.append(image_file.getpixel((w, h)))
return tmpls
def getAvg(self, ls): # 获取平均灰度值
return sum(ls) / len(ls)
def getImgHash(self, fne):
image_file = Image.open(fne) # 打开
image_file = image_file.resize((480, 480)) # 重置图片大小我12px X 12px
image_file = image_file.convert("L") # 转256灰度图
Grayls = self.getGray(image_file) # 灰度集合
avg = self.getAvg(Grayls) # 灰度平均值
bitls = '' # 接收获取0或1
# 除去变宽1px遍历像素
for h in range(1, image_file.size[1] - 1): # h
for w in range(1, image_file.size[0] - 1): # w
if image_file.getpixel((w, h)) >= avg: # 像素的值比较平均值 大于记为1 小于记为0
bitls = bitls + '1'
else:
bitls = bitls + '0'
return bitls
def getMH(self, a, b): # 比较100个字符有几个字符相同
dist = 0
print a
print b
for i in range(0, len(a)):
if a[i] == b[i]:
dist = dist + 1
dist_done = format(float(dist) / float(len(a)), accuracy)
return dist_done
def compare_pic_hash(self, hash_a, hash_b):
"""
:param hash_a: 图片A的hash
:param hash_b: 图片b的hash
:return:
"""
compare = self.getMH(hash_a, hash_b)
return compare
def get_file_name_list_from_path(self, file_path):
"""
:return:
"""
file_name_list = []
for file_name in os.listdir(file_path):
if os.path.getsize(os.path.join(file_path, file_name)) < 102400:
try:
os.remove(os.path.join(os.path.join(file_path, file_name)))
except Exception as e:
print "删除失败,原因:", str(e)
else:
if not (os.path.split(file_name)[1].endswith(".jpg") or os.path.split(file_name)[1].endswith(".png")):
pass
else:
file_name_list.append(file_name)
return file_name_list
def get_file_hash_list_from_file_name_list(self, file_path, file_name_list):
"""
:return:
"""
file_hash_list = []
try:
for i in range(0, len(file_name_list)):
hash_of_file = self.getImgHash(os.path.join(file_path, file_name_list[i]))
file_hash_list.append(hash_of_file)
self.view_bar(i, len(file_name_list))
except Exception as e:
print "获取文件hash错误!", str(e)
return file_hash_list
def getDocSize(self, path):
try:
size = os.path.getsize(path)
return size
except Exception as err:
print(err)
def view_bar(self, num, total):
rate = float(num) / total
rate_num = int(rate * 100) + 1
r = '\r[%s%s]%d%%' % ("#" * rate_num, " " * (100 - rate_num), rate_num,)
sys.stdout.write(r)
sys.stdout.flush()
def compare_pic_hash_list(self, file_hash_list, file_path, file_name_list):
"""
:param file_name_list:
:param file_path:
:param file_hash_list:
:return:
"""
if len(file_name_list) != len(file_hash_list):
sys.exit("hash list 与 file list下标不一致!")
total = (len(file_name_list) * (len(file_name_list) - 1)) / 2
print "\n共需要对比", total
need_delete_list_file_name = []
count = 0
for i in range(0, len(file_hash_list)):
for j in range(i + 1, len(file_hash_list)):
compare = self.compare_pic_hash(file_hash_list[i], file_hash_list[j])
if float(compare) - float(like) > 0:
file_i_size = self.getDocSize(os.path.join(file_path, file_name_list[i]))
file_j_size = self.getDocSize(os.path.join(file_path, file_name_list[j]))
if file_i_size - file_j_size > 0:
need_delete_list_file_name.append(file_name_list[j])
break
else:
need_delete_list_file_name.append(file_name_list[i])
continue
count = count + 1
self.view_bar(count, total)
news_ids = []
for need_delete_file_path in need_delete_list_file_name:
if need_delete_file_path not in news_ids:
news_ids.append(need_delete_file_path)
print "\n去重后的需要删除的长度", len(news_ids)
return news_ids
def delete_file(self, file_path, need_delete_list_file_name):
"""
:return:
"""
for need_delete_file_name in need_delete_list_file_name:
try:
os.remove(os.path.join(file_path, need_delete_file_name))
except Exception as e:
print "删除失败,原因:", str(e)
def main(self, file_path):
"""
aa
:return:
"""
# 获取所有文件的文件名,放入list
# 获取所有文件的hash 多线程
# 对比文件hash值
# 如果hash值相似度大于95%
# 删小留大,否则pass 将需要删除的文件放入 待删除list
# 单线程删除文件
file_name_list = self.get_file_name_list_from_path(file_path)
print "文件夹下共包含文件", len(file_name_list), "个"
file_hash_list = self.get_file_hash_list_from_file_name_list(file_path, file_name_list)
need_delete_list_file_name = self.compare_pic_hash_list(file_hash_list, file_path, file_name_list)
need_delete_list_file_name_new = []
for item in need_delete_list_file_name:
if "uncensored" not in item:
need_delete_list_file_name_new.append(item)
self.delete_file(file_path, need_delete_list_file_name_new)
return need_delete_list_file_name_new
def file_name(self, file_dir):
for root, dirs, files in os.walk(file_dir):
return dirs # 当前路径下所有子目录
if __name__ == '__main__':
x = SavePic()
now_dir = os.path.dirname(os.path.abspath(__file__))
project_dir = os.path.split(now_dir)[0]
img_path = os.path.join(project_dir, "images")
img_info = x.file_name(img_path)
# print img_info
# for i in range(0, len(img_info)):
# print "当前对比的文件夹", img_info[i]
# for j in range(0, 3):
# print "正在进行的去重次数", i
# file_patha = os.path.join(img_path, img_info[i])
# need_delete_list_file_name = x.main(file_patha)
# if len(need_delete_list_file_name) == 0:
# break
for j in range(0, 3):
print "正在进行的去重次数", j
# file_patha = os.path.join(img_path, tags)
file_patha = os.path.join(img_path, "kaetzchen")
need_delete_list_file_name = x.main(file_patha)
if len(need_delete_list_file_name) == 0:
break
os.system('say "去重完成!"')
热心网友
时间:2023-10-21 10:33
存储空间吧 不仅能筛选重复的图片还能筛选重复的文档之类的,我自己在用,觉得还不错,挺能清理内存的,
热心网友
时间:2023-10-21 10:33
买个小米手机吧,自带的管家,直接识别相似照片
热心网友
时间:2023-10-21 10:34
在手机管家上!你打开手机管家点空间清理,看到图片那栏点开就有
热心网友
时间:2023-10-21 10:35
最好把手机用数据线连接到电脑,电脑里删除比较快速。