ebay-ml-lister/image_faults.py

29 lines
1.0 KiB
Python
Raw Normal View History

import os
import PIL
from pathlib import Path
from PIL import UnidentifiedImageError, Image
'''
Since PIL is used in keras to open images, you need to identify and remove
faulty images to avoid hiccups in training. When these are removed from their
parent folders, their corresponding row in the dataframe should also be removed.
But because the dataframe is constructed as such:
'''
def faulty_images():
path = Path("training_images").rglob("*.jpg")
for img_p in path:
try:
img = PIL.Image.open(img_p)
except PIL.UnidentifiedImageError:
os.remove(img_p)
print(img_p + "Removed")
# remove from folder, dataset(is constructed from the csv files
# ), dict_pics, temp_pics_source_list,
# expanded_dropd, expanded_class. But, remember that if you run curate.py
# again the same faulty images will be recreated since it's still in
# the raw_data.txt file
if __name__=="__main__":
faulty_images()