added image_faults.py 2 remove faulty images, fixed non-expand PictureURL dfs
This commit is contained in:
parent
e766a1ba9d
commit
2f3df22c4a
@ -11,7 +11,7 @@ training = curate.to_training(raw_data) # creates raw_df
|
||||
class_training = curate.class_training(training) # creates initial class_training df
|
||||
nvl_training = curate.nvl_training(training) # creates initial nvl_training
|
||||
dropd = curate.drop_nvl_cols(nvl_training) # label mask
|
||||
|
||||
dropd
|
||||
expanded_dfs = curate.expand_nvlclass(class_training, dropd) # pulls values out of lists for both dfs
|
||||
|
||||
expanded_class = expanded_dfs[0] # TODO still having problems with Unnamed: 0 col
|
||||
|
11
ebay_api.py
11
ebay_api.py
@ -58,7 +58,6 @@ class ShoppingApi:
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=4)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
print('connection error')
|
||||
@ -69,7 +68,7 @@ class ShoppingApi:
|
||||
temp_cat_list = [cat['CategoryID'] for cat in response]
|
||||
cat_list.extend(temp_cat_list)
|
||||
|
||||
with open('cat_list1.txt', 'w') as f:
|
||||
with open('cat_list.txt', 'w') as f:
|
||||
json.dump(cat_list, f)
|
||||
|
||||
# leaf_list = [node['LeafCategory'] for node in response]
|
||||
@ -281,9 +280,11 @@ class CurateData:
|
||||
# computate power reqs. So, figure out a way to make a true temp list based on the current call executed
|
||||
|
||||
else:
|
||||
class_training['PictureURL'] = class_training['PictureURL'].apply(lambda x: x[0])
|
||||
expanded_class = class_training
|
||||
dropd['PictureURL'] = dropd['PictureURL'].apply(lambda x: x[0])
|
||||
class_training['PictureURL'] = class_training['PictureURL'].apply(lambda x: x[0] if len(x)>0 else np.nan)
|
||||
expanded_class = class_training.dropna()
|
||||
dropd = dropd.dropna(subset=['PictureURL'])
|
||||
dropd['PictureURL'] = dropd['PictureURL'].apply(lambda x: x[0] if len(x)>0 else np.nan)
|
||||
dropd = dropd.dropna(subset=['PictureURL'])
|
||||
expanded_dropd = dropd
|
||||
|
||||
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
|
||||
|
28
image_faults.py
Normal file
28
image_faults.py
Normal file
@ -0,0 +1,28 @@
|
||||
import os
|
||||
import PIL
|
||||
from pathlib import Path
|
||||
from PIL import UnidentifiedImageError, Image
|
||||
|
||||
'''
|
||||
Since PIL is used in keras to open images, you need to identify and remove
|
||||
faulty images to avoid hiccups in training. When these are removed from their
|
||||
parent folders, their corresponding row in the dataframe should also be removed.
|
||||
But because the dataframe is constructed as such:
|
||||
|
||||
'''
|
||||
def faulty_images():
|
||||
path = Path("training_images").rglob("*.jpg")
|
||||
for img_p in path:
|
||||
try:
|
||||
img = PIL.Image.open(img_p)
|
||||
except PIL.UnidentifiedImageError:
|
||||
os.remove(img_p)
|
||||
print(img_p + "Removed")
|
||||
# remove from folder, dataset(is constructed from the csv files
|
||||
# ), dict_pics, temp_pics_source_list,
|
||||
# expanded_dropd, expanded_class. But, remember that if you run curate.py
|
||||
# again the same faulty images will be recreated since it's still in
|
||||
# the raw_data.txt file
|
||||
|
||||
if __name__=="__main__":
|
||||
faulty_images()
|
Loading…
Reference in New Issue
Block a user