diff --git a/ebay_api.py b/ebay_api.py index ff62a52..b063fe4 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -52,7 +52,7 @@ class FindingApi: "paginationInput.PageNumber":i, "itemFilter(0).name":"Condition", "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either - # but may not be necessary anyways if you can eleminate dupes + # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics "sortOrder":"StartTimeNewest", "itemFilter.name":"HideDuplicateItems", # this is working "itemFilter.value":'true' @@ -190,7 +190,7 @@ class ShoppingApi: try: with open('raw_data.txt') as f: data = json.load(f) - except (FileNotFoundError, ValueError): + except (FileNotFoundError, ValueError): # TODO not catching error data = [] service_dict = { @@ -382,35 +382,44 @@ class CurateData: expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values + temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # TODO because var is del after dl_pictures you may be + # getting duplicate pictures. ie, expanded_class.PictureURL is a master series and will write temp_pics_source_list as such + # giving you many repeated pictureURLs (they will not get downloaded due to check @ dl_pic but checking will cont to grow in + # computate power reqs. So, figure out a way to make a true temp list based on the current call executed + + else: + class_training['PictureURL'] = class_training['PictureURL'].apply(lambda x: x[0]) + expanded_class = class_training + dropd['PictureURL'] = dropd['PictureURL'].apply(lambda x: x[0]) + expanded_dropd = dropd + + expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) - - try: - with open('temp_pics_source_list.txt') as f: - tpsl = json.load(f) - tpsl.extend(temp_pics_source_list) - temp_pics_source_list = list(set(tpsl)) - with open('temp_pics_source_list.txt', 'w') as f: - json.dump(temp_pics_source_list, f) - except (ValueError, FileNotFoundError): + try: + with open('temp_pics_source_list.txt') as f: + tpsl = json.load(f) + tpsl.extend(temp_pics_source_list) + temp_pics_source_list = list(set(tpsl)) with open('temp_pics_source_list.txt', 'w') as f: json.dump(temp_pics_source_list, f) + except (ValueError, FileNotFoundError): + with open('temp_pics_source_list.txt', 'w') as f: + json.dump(temp_pics_source_list, f) - # Append to master training dataframes, drop potential dupes and save + # Append to master training dataframes, drop potential dupes and save - expanded_class.to_csv('expanded_class.csv') - # expanded_class = pd.read_csv('expanded_class.csv', index_col=0) - # expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True) - # expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies + expanded_class.to_csv('expanded_class.csv') + # expanded_class = pd.read_csv('expanded_class.csv', index_col=0) + # expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True) + # expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies - expanded_dropd.to_csv('expanded_dropd.csv') - # expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0) - # expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True) - # expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8') + expanded_dropd.to_csv('expanded_dropd.csv') + # expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0) + # expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True) + # expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8') - return expanded_class, expanded_dropd - else: - return + return expanded_class, expanded_dropd def dl_pictures(self, *args): '''