import os import PIL from pathlib import Path from PIL import UnidentifiedImageError, Image ''' Since PIL is used in keras to open images, you need to identify and remove faulty images to avoid hiccups in training. When these are removed from their parent folders, their corresponding row in the dataframe should also be removed. But because the dataframe is constructed as such: ''' def faulty_images(): path = Path("training_images").rglob("*.jpg") for img_p in path: try: img = PIL.Image.open(img_p) except PIL.UnidentifiedImageError: os.remove(img_p) print(img_p + "Removed") # remove from folder, dataset(is constructed from the csv files # ), dict_pics, temp_pics_source_list, # expanded_dropd, expanded_class. But, remember that if you run curate.py # again the same faulty images will be recreated since it's still in # the raw_data.txt file if __name__=="__main__": faulty_images()