From eb780faf407f284509e6c52d7b3221e4b0fae2b6 Mon Sep 17 00:00:00 2001 From: spbeach46 Date: Sun, 21 Nov 2021 16:05:20 -0700 Subject: [PATCH] modTimeFrom w dateutil datetime timedelta parser integration --- ebay_api.py | 104 +++++++++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 45 deletions(-) diff --git a/ebay_api.py b/ebay_api.py index 59e17b3..cc26bd6 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -1,4 +1,8 @@ import os +from datetime import datetime, timedelta +import dateutil +from dateutil import parser +import pytz import pdb from io import StringIO import numpy as np @@ -21,56 +25,53 @@ class FindingApi: Methods for accessing eBay's FindingApi services ''' - def __init__(self, service, target_idspc): #target ids per cat + def __init__(self, service, idspc): self.service = [ 'findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct' ][service] # Currently using only index 4, i.e., service = 4 - self.target_idspc = target_idspc - - # examples of additional params you may want to add: + self.idspc = idspc # examples of additional params you may want to add: # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call # 'itemFilter(1).name':'ListingType' # 'itemFilter(1).value':'AuctionWithBIN' # 'StartTimeNewest' # HideDuplicateItems - def get_data(self, category_id, target_idspc): + def get_data(self, category_id, idspc): ''' Gets raw JSON data fom FindingApi service call. Currently being used to get itemIDs from categories; ''' +# startTime = dateutil.parser.isoparse( startTime ) +# now = datetime.datetime.now(tz=pytz.UTC) +# days_on_site = (now - startTime).days # as int - ''' - consider using the sortOrder param to update by the latest listings first. - Also consider using the exlude duplicates param and possibly others. - research ebay api docs to find cadidates - ''' - i = 1 ids = [] + modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value + i = 1 params = { "OPERATION-NAME":self.service, "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], "SERVICE-VERSION":"1.13.0", "RESPONSE-DATA-FORMAT":"JSON", "categoryId":category_id, - "paginationInput.entriesPerPage":"100", + "paginationInput.entriesPerPage":"20", "paginationInput.PageNumber":i, "itemFilter(0).name":"Condition", - "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either - # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics + "itemFilter(0).value":"Used", + "itemFilter.name":"HideDuplicateItems", + "itemFilter.value":"true", "sortOrder":"StartTimeNewest", - "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page - "itemfilter.value":'true' } - while len(ids) < target_idspc: # target_ids(per category) +# "itemFilter.name(2)":"modTimeFrom", +# "itemFilter.value(2)":modTimeFrom, + + while len(ids) < idspc: + try: - print(len(ids)) - print(params['paginationInput.PageNumber']) - input('press enter to continue') response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params, timeout=24) response.raise_for_status() @@ -80,33 +81,39 @@ class FindingApi: return ids try: data = response.json() - + itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0] + modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0] + modTimeFrom = dateutil.parser.isoparse( modTimeFrom ) + modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: # if item not in ids: ids.append(item['itemId'][0]) -# ids = list(set(ids)) - i += 1 - params = { - "OPERATION-NAME":self.service, - "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], - "SERVICE-VERSION":"1.13.0", - "RESPONSE-DATA-FORMAT":"JSON", - "categoryId":category_id, - "paginationInput.entriesPerPage":"100", - "paginationInput.PageNumber":i, - "itemFilter(0).name":"Condition", - "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either - # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics - "sortOrder":"StartTimeNewest", - "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page - "itemfilter.value":'true' - } + #ids = list(set(ids)) + except (AttributeError, KeyError): print('AttributeError or KeyError. Exiting') + print(response.json()) return ids - return ids, data + input('press enter to continue') + i+=1 + params = { + "OPERATION-NAME":self.service, + "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'], + "SERVICE-VERSION":"1.13.0", + "RESPONSE-DATA-FORMAT":"JSON", + "categoryId":category_id, + "paginationInput.entriesPerPage":"20", + "paginationInput.PageNumber":i, + "itemFilter(0).name":"Condition", + "itemFilter(0).value":"Used", + "itemFilter.name":"HideDuplicateItems", + "itemFilter.value":"true", + "sortOrder":"StartTimeNewest", + } + + return ids, data, modTimeFrom, itemSearchURL # TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe # try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a @@ -121,25 +128,32 @@ class FindingApi: Creates a 20-itemId list to use for the ShoppingApi call ''' - target_idspc = self.target_idspc +# target_idspc = self.target_idspc + idspc = self.idspc itemid_results_list = [] with open('cat_list.txt') as jf: cat_list = json.load(jf) - args = [(cat, target_idspc) for cat in cat_list] + for cat in cat_list: + args = [(cat, idspc) for cat in cat_list] - with concurrent.futures.ThreadPoolExecutor() as executor: - for future in executor.map(lambda p: self.get_data(*p), args): - itemid_results_list.extend(future) + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(lambda p: self.get_data(*p), args): + itemid_results_list.extend(future) + + print(len(itemid_results_list)) + a = list(set(itemid_results_list)) + print(len(a)) + input('press enter to continue') with open('raw_ids.txt', 'w') as f: json.dump(itemid_results_list, f) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints - return item_id_results + return item_id_results, itemid_results_list # TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding # service on more pages (but only pages you haven't tried) and repeat the search process.