29 lines
1.0 KiB
Python
29 lines
1.0 KiB
Python
|
import os
|
||
|
import PIL
|
||
|
from pathlib import Path
|
||
|
from PIL import UnidentifiedImageError, Image
|
||
|
|
||
|
'''
|
||
|
Since PIL is used in keras to open images, you need to identify and remove
|
||
|
faulty images to avoid hiccups in training. When these are removed from their
|
||
|
parent folders, their corresponding row in the dataframe should also be removed.
|
||
|
But because the dataframe is constructed as such:
|
||
|
|
||
|
'''
|
||
|
def faulty_images():
|
||
|
path = Path("training_images").rglob("*.jpg")
|
||
|
for img_p in path:
|
||
|
try:
|
||
|
img = PIL.Image.open(img_p)
|
||
|
except PIL.UnidentifiedImageError:
|
||
|
os.remove(img_p)
|
||
|
print(img_p + "Removed")
|
||
|
# remove from folder, dataset(is constructed from the csv files
|
||
|
# ), dict_pics, temp_pics_source_list,
|
||
|
# expanded_dropd, expanded_class. But, remember that if you run curate.py
|
||
|
# again the same faulty images will be recreated since it's still in
|
||
|
# the raw_data.txt file
|
||
|
|
||
|
if __name__=="__main__":
|
||
|
faulty_images()
|