replaced FindingApi for easy refining of cat search

This commit is contained in:
scott 2021-12-31 15:30:06 -07:00
parent 5ec46ae0c7
commit 6650468756

View File

@ -22,6 +22,140 @@ from ebaysdk.trading import Connection as Trading
from ebaysdk.finding import Connection as Finding
from ebaysdk.shopping import Connection as Shopping
class FindingApi:
'''
Methods for accessing eBay's FindingApi services
'''
def __init__(self, service, idspc):
self.service = [
'findItemsAdvanced', 'findCompletedItems',
'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
'findItemsByProduct'
][service] # Currently using only index 4, i.e., service = 4
self.idspc = idspc # examples of additional params you may want to add:
# 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
# 'itemFilter(1).name':'ListingType'
# 'itemFilter(1).value':'AuctionWithBIN'
# 'StartTimeNewest'
# HideDuplicateItems
def get_data(self, category_id, idspc):
'''
Gets raw JSON data fom FindingApi service call. Currently being used to
get itemIDs from categories;
'''
# startTime = dateutil.parser.isoparse( startTime )
# now = datetime.datetime.now(tz=pytz.UTC)
# days_on_site = (now - startTime).days # as int
ids = []
modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value
i = 1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"20",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used",
"itemFilter.name":"HideDuplicateItems",
"itemFilter.value":"true",
"sortOrder":"StartTimeNewest",
}
# "itemFilter.name(2)":"modTimeFrom",
# "itemFilter.value(2)":modTimeFrom,
while len(ids) < idspc:
try:
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params, timeout=24)
response.raise_for_status()
except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout?
print('connection error')
return ids
try:
data = response.json()
itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0]
modTimeFrom = dateutil.parser.isoparse( modTimeFrom )
modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
# if item not in ids:
ids.append(item['itemId'][0])
#ids = list(set(ids))
except (AttributeError, KeyError):
print('AttributeError or KeyError. Exiting')
print(response.json())
return ids
input('press enter to continue')
i+=1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"20",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used",
"itemFilter.name":"HideDuplicateItems",
"itemFilter.value":"true",
"sortOrder":"StartTimeNewest",
}
return ids, data, modTimeFrom, itemSearchURL
# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe
# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
# random set of 3 of 8 pictures total from each listing you might have a better chance of getting 3 good pictures in addition to increasing your training set. Or maybe you would have better luck with limiting
# it to the first 5 pictures instead of random.
# You may even have more consistency with used shoes since they are "one-off" items without confusing multiple variations and colors. What else you can do is run small training sets on both new and used
# to see which one is more accurate or if a combo of both is more accurate.
def get_ids_from_cats(self): #TODO need to resolve duplicates here to maximize unique ids/data and ShopppingApi call
'''
Creates a 20-itemId list to use for the ShoppingApi
call
'''
# target_idspc = self.target_idspc
idspc = self.idspc
itemid_results_list = []
with open('cat_list.txt') as jf:
cat_list = json.load(jf)
for cat in cat_list:
args = [(cat, idspc) for cat in cat_list]
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(lambda p: self.get_data(*p), args):
itemid_results_list.extend(future)
print(len(itemid_results_list))
a = list(set(itemid_results_list))
print(len(a))
input('press enter to continue')
with open('raw_ids.txt', 'w') as f:
json.dump(itemid_results_list, f)
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
return item_id_results, itemid_results_list
class ShoppingApi:
'''