From 66504687561c55c58b818991de0e5ded512c684f Mon Sep 17 00:00:00 2001 From: scott Date: Fri, 31 Dec 2021 15:30:06 -0700 Subject: [PATCH] replaced FindingApi for easy refining of cat search --- ebay_api.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/ebay_api.py b/ebay_api.py index 26e3060..2176816 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -22,6 +22,140 @@ from ebaysdk.trading import Connection as Trading from ebaysdk.finding import Connection as Finding from ebaysdk.shopping import Connection as Shopping +class FindingApi: + ''' + Methods for accessing eBay's FindingApi services + ''' + + def __init__(self, service, idspc): + self.service = [ + 'findItemsAdvanced', 'findCompletedItems', + 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', + 'findItemsByProduct' + ][service] # Currently using only index 4, i.e., service = 4 + self.idspc = idspc # examples of additional params you may want to add: + # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call + # 'itemFilter(1).name':'ListingType' + # 'itemFilter(1).value':'AuctionWithBIN' + # 'StartTimeNewest' + # HideDuplicateItems + + def get_data(self, category_id, idspc): + + ''' + Gets raw JSON data fom FindingApi service call. Currently being used to + get itemIDs from categories; + ''' +# startTime = dateutil.parser.isoparse( startTime ) +# now = datetime.datetime.now(tz=pytz.UTC) +# days_on_site = (now - startTime).days # as int + + ids = [] + modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value + i = 1 + params = { + "OPERATION-NAME":self.service, + "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], + "SERVICE-VERSION":"1.13.0", + "RESPONSE-DATA-FORMAT":"JSON", + "categoryId":category_id, + "paginationInput.entriesPerPage":"20", + "paginationInput.PageNumber":i, + "itemFilter(0).name":"Condition", + "itemFilter(0).value":"Used", + "itemFilter.name":"HideDuplicateItems", + "itemFilter.value":"true", + "sortOrder":"StartTimeNewest", + } + +# "itemFilter.name(2)":"modTimeFrom", +# "itemFilter.value(2)":modTimeFrom, + + while len(ids) < idspc: + + try: + response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", + params=params, timeout=24) + response.raise_for_status() + + except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout? + print('connection error') + return ids + try: + data = response.json() + itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0] + modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0] + modTimeFrom = dateutil.parser.isoparse( modTimeFrom ) + modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: +# if item not in ids: + ids.append(item['itemId'][0]) + + #ids = list(set(ids)) + + except (AttributeError, KeyError): + print('AttributeError or KeyError. Exiting') + print(response.json()) + return ids + + input('press enter to continue') + i+=1 + params = { + "OPERATION-NAME":self.service, + "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], + "SERVICE-VERSION":"1.13.0", + "RESPONSE-DATA-FORMAT":"JSON", + "categoryId":category_id, + "paginationInput.entriesPerPage":"20", + "paginationInput.PageNumber":i, + "itemFilter(0).name":"Condition", + "itemFilter(0).value":"Used", + "itemFilter.name":"HideDuplicateItems", + "itemFilter.value":"true", + "sortOrder":"StartTimeNewest", + } + + return ids, data, modTimeFrom, itemSearchURL + +# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe +# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a +# random set of 3 of 8 pictures total from each listing you might have a better chance of getting 3 good pictures in addition to increasing your training set. Or maybe you would have better luck with limiting +# it to the first 5 pictures instead of random. + +# You may even have more consistency with used shoes since they are "one-off" items without confusing multiple variations and colors. What else you can do is run small training sets on both new and used +# to see which one is more accurate or if a combo of both is more accurate. + + def get_ids_from_cats(self): #TODO need to resolve duplicates here to maximize unique ids/data and ShopppingApi call + ''' + Creates a 20-itemId list to use for the ShoppingApi + call + ''' +# target_idspc = self.target_idspc + idspc = self.idspc + + itemid_results_list = [] + + with open('cat_list.txt') as jf: + cat_list = json.load(jf) + + for cat in cat_list: + args = [(cat, idspc) for cat in cat_list] + + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(lambda p: self.get_data(*p), args): + itemid_results_list.extend(future) + + print(len(itemid_results_list)) + a = list(set(itemid_results_list)) + print(len(a)) + input('press enter to continue') + + with open('raw_ids.txt', 'w') as f: + json.dump(itemid_results_list, f) + + item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, + len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints + return item_id_results, itemid_results_list class ShoppingApi: '''