commit before push

This commit is contained in:
spbeach46 2021-06-10 01:53:36 -07:00
parent fc7449827c
commit 2c5b878977

View File

@ -52,7 +52,7 @@ class FindingApi:
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
# but may not be necessary anyways if you can eleminate dupes
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this is working
"itemFilter.value":'true'
@ -190,7 +190,7 @@ class ShoppingApi:
try:
with open('raw_data.txt') as f:
data = json.load(f)
except (FileNotFoundError, ValueError):
except (FileNotFoundError, ValueError): # TODO not catching error
data = []
service_dict = {
@ -382,35 +382,44 @@ class CurateData:
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # TODO because var is del after dl_pictures you may be
# getting duplicate pictures. ie, expanded_class.PictureURL is a master series and will write temp_pics_source_list as such
# giving you many repeated pictureURLs (they will not get downloaded due to check @ dl_pic but checking will cont to grow in
# computate power reqs. So, figure out a way to make a true temp list based on the current call executed
else:
class_training['PictureURL'] = class_training['PictureURL'].apply(lambda x: x[0])
expanded_class = class_training
dropd['PictureURL'] = dropd['PictureURL'].apply(lambda x: x[0])
expanded_dropd = dropd
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
temp_pics_source_list = list(set(expanded_class.PictureURL.to_list()))
try:
with open('temp_pics_source_list.txt') as f:
tpsl = json.load(f)
tpsl.extend(temp_pics_source_list)
temp_pics_source_list = list(set(tpsl))
with open('temp_pics_source_list.txt', 'w') as f:
json.dump(temp_pics_source_list, f)
except (ValueError, FileNotFoundError):
try:
with open('temp_pics_source_list.txt') as f:
tpsl = json.load(f)
tpsl.extend(temp_pics_source_list)
temp_pics_source_list = list(set(tpsl))
with open('temp_pics_source_list.txt', 'w') as f:
json.dump(temp_pics_source_list, f)
except (ValueError, FileNotFoundError):
with open('temp_pics_source_list.txt', 'w') as f:
json.dump(temp_pics_source_list, f)
# Append to master training dataframes, drop potential dupes and save
# Append to master training dataframes, drop potential dupes and save
expanded_class.to_csv('expanded_class.csv')
# expanded_class = pd.read_csv('expanded_class.csv', index_col=0)
# expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
# expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies
expanded_class.to_csv('expanded_class.csv')
# expanded_class = pd.read_csv('expanded_class.csv', index_col=0)
# expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
# expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies
expanded_dropd.to_csv('expanded_dropd.csv')
# expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0)
# expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
# expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8')
expanded_dropd.to_csv('expanded_dropd.csv')
# expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0)
# expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
# expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8')
return expanded_class, expanded_dropd
else:
return
return expanded_class, expanded_dropd
def dl_pictures(self, *args):
'''