diff --git a/ebay_api.py b/ebay_api.py index 92ede92..87c9acf 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -27,6 +27,8 @@ class FindingApi: # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call # 'itemFilter(1).name':'ListingType' # 'itemFilter(1).value':'AuctionWithBIN' + # 'StartTimeNewest' + # HideDuplicateItems def get_data(self, category_id, i): @@ -37,7 +39,8 @@ class FindingApi: ''' consider using the sortOrder param to update by the latest listings first. - Also consider using the exlude duplicates param + Also consider using the exlude duplicates param and possibly others. + research ebay api docs to find cadidates ''' params = { "OPERATION-NAME":self.service, @@ -48,7 +51,9 @@ class FindingApi: "paginationInput.entriesPerPage":"100", "paginationInput.PageNumber":i, "itemFilter(0).name":"Condition", - "itemFilter(0).value":"Used" + "itemFilter(0).value":"Used", + "sortOrder":"StartTimeNewest", + "itemFilter(0).name":"HideDuplicateItems" } # TODO add try excepts here @@ -81,28 +86,19 @@ class FindingApi: with open('cat_list.txt') as jf: cat_list = json.load(jf) + args = [] for category_id in cat_list: - args = [(category_id, i) for i in pages] # NOTE alternatively you can use args.extend(args) to create master list of tuples with all cats - # instead of running concurrent.futures.ThreadPoolExecutor in a loop. Might be faster + bargs = [(category_id, i) for i in pages] + args.extend(bargs) - with concurrent.futures.ThreadPoolExecutor() as executor: - for future in executor.map(lambda p: self.get_data(*p), args): - data = future + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(lambda p: self.get_data(*p), args): + data = future - ''' - These try excepts may be unnecessary. - ''' - try: # TODO if conditionals are not working due to each thread checking the same unedited item_id_results list - training = pd.read_csv('training.csv') - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if (item not in training.values) and (item not in itemid_results_list): # might not be required - itemid_results_list.append(item['itemId'][0]) - - except (pd.errors.EmptyDataError, FileNotFoundError): - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if item not in itemid_results_list: - itemid_results_list.append(item['itemId'][0]) + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: + if item not in itemid_results_list: + itemid_results_list.append(item['itemId'][0]) item_id_results = list(set(itemid_results_list)) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, @@ -416,6 +412,10 @@ class CurateData: and creates custom {source:target} dictionary as dict_pics ''' + # TODO add option to include only first image of each listing as + # others may be crappy for training. Also consider adding option to + # reduce the size of each pic downloaded + try: with open('target_dirs.txt', 'r+') as f: # TODO you can add option to change directory here, too. Look up how to have optional arguments target_dir = json.load(f)