From 1faa4e86fd626c744f54832759dbf4c5850265fb Mon Sep 17 00:00:00 2001 From: scott Date: Mon, 15 Nov 2021 21:01:02 -0700 Subject: [PATCH] duplicate id fixes. full data not getting fetched --- ebay_api.py | 73 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/ebay_api.py b/ebay_api.py index 5b6183d..16aa3ac 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -20,13 +20,13 @@ class FindingApi: Methods for accessing eBay's FindingApi services ''' - def __init__(self, service, pageNumber): + def __init__(self, service, target_idspc): #target ids per cat self.service = [ 'findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct' ][service] # Currently using only index 4, i.e., service = 4 - self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints + self.target_idspc = target_idspc # examples of additional params you may want to add: # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call @@ -35,7 +35,7 @@ class FindingApi: # 'StartTimeNewest' # HideDuplicateItems - def get_data(self, category_id, i): + def get_data(self, category_id, target_idspc): ''' Gets raw JSON data fom FindingApi service call. Currently being used to @@ -47,6 +47,8 @@ class FindingApi: Also consider using the exlude duplicates param and possibly others. research ebay api docs to find cadidates ''' + i = 1 + ids = [] params = { "OPERATION-NAME":self.service, "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], @@ -63,16 +65,40 @@ class FindingApi: "itemFilter.value":'true' } - # TODO add try excepts here - try: - response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", - params=params, timeout=4) - response.raise_for_status() + while len(ids) < target_idspc: + try: + response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", + params=params, timeout=7) + response.raise_for_status() - except requests.exceptions.RequestException: - print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION - data = response.json() - return data + except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout? + print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION + return ids + data = response.json() + + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: + if item not in ids: + ids.append(item['itemId'][0]) + + ids = list(set(ids)) + i += 1 + params = { + "OPERATION-NAME":self.service, + "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], + "SERVICE-VERSION":"1.13.0", + "RESPONSE-DATA-FORMAT":"JSON", + "categoryId":category_id, + "paginationInput.entriesPerPage":"100", + "paginationInput.PageNumber":i, + "itemFilter(0).name":"Condition", + "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either + # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics + "sortOrder":"StartTimeNewest", + "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page + "itemFilter.value":'true' + } + + return ids # TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe # try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a @@ -87,27 +113,22 @@ class FindingApi: Creates a 20-itemId list to use for the ShoppingApi call ''' - pages = self.pageNumber + target_idspc = self.target_idspc + itemid_results_list = [] with open('cat_list.txt') as jf: cat_list = json.load(jf) - args = [] - for category_id in cat_list: - - bargs = [(category_id, i) for i in pages] - args.extend(bargs) + args = [(cat, target_idspc) for cat in cat_list] with concurrent.futures.ThreadPoolExecutor() as executor: for future in executor.map(lambda p: self.get_data(*p), args): - data = future + itemid_results_list.extend(future) - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if item not in itemid_results_list: - itemid_results_list.append(item['itemId'][0]) + with open('raw_ids.txt', 'w') as f: + json.dump(itemid_results_list, f) - item_id_results = list(set(itemid_results_list)) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints return item_id_results @@ -204,7 +225,7 @@ class ShoppingApi: service_dict fnd_srvc = input(str(service_dict) + "choose Finding call: (press 'enter' for default(4))") - pg_num = int(input('how many ids per cat? (7692 max)')) + target_idspc = int(input('how many ids per cat? (7692 max)')) optional_params = { "itemFilter(0).name":"Condition", @@ -213,10 +234,10 @@ class ShoppingApi: if fnd_srvc != '': fnd_srvc = int(fnd_srvc) - finding = FindingApi(fnd_srvc, pg_num) + finding = FindingApi(fnd_srvc, target_idspc) else: fnd_srvc = 4 - finding = FindingApi(fnd_srvc, pg_num) + finding = FindingApi(fnd_srvc, target_idspc) item_id_results = finding.get_ids_from_cats() with concurrent.futures.ThreadPoolExecutor() as executor: