duplicate id fixes. full data not getting fetched
This commit is contained in:
parent
6bdc94c8c6
commit
1faa4e86fd
73
ebay_api.py
73
ebay_api.py
@ -20,13 +20,13 @@ class FindingApi:
|
||||
Methods for accessing eBay's FindingApi services
|
||||
'''
|
||||
|
||||
def __init__(self, service, pageNumber):
|
||||
def __init__(self, service, target_idspc): #target ids per cat
|
||||
self.service = [
|
||||
'findItemsAdvanced', 'findCompletedItems',
|
||||
'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
|
||||
'findItemsByProduct'
|
||||
][service] # Currently using only index 4, i.e., service = 4
|
||||
self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints
|
||||
self.target_idspc = target_idspc
|
||||
|
||||
# examples of additional params you may want to add:
|
||||
# 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
|
||||
@ -35,7 +35,7 @@ class FindingApi:
|
||||
# 'StartTimeNewest'
|
||||
# HideDuplicateItems
|
||||
|
||||
def get_data(self, category_id, i):
|
||||
def get_data(self, category_id, target_idspc):
|
||||
|
||||
'''
|
||||
Gets raw JSON data fom FindingApi service call. Currently being used to
|
||||
@ -47,6 +47,8 @@ class FindingApi:
|
||||
Also consider using the exlude duplicates param and possibly others.
|
||||
research ebay api docs to find cadidates
|
||||
'''
|
||||
i = 1
|
||||
ids = []
|
||||
params = {
|
||||
"OPERATION-NAME":self.service,
|
||||
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
|
||||
@ -63,16 +65,40 @@ class FindingApi:
|
||||
"itemFilter.value":'true'
|
||||
}
|
||||
|
||||
# TODO add try excepts here
|
||||
try:
|
||||
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
|
||||
params=params, timeout=4)
|
||||
response.raise_for_status()
|
||||
while len(ids) < target_idspc:
|
||||
try:
|
||||
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
|
||||
params=params, timeout=7)
|
||||
response.raise_for_status()
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION
|
||||
data = response.json()
|
||||
return data
|
||||
except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout?
|
||||
print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION
|
||||
return ids
|
||||
data = response.json()
|
||||
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
if item not in ids:
|
||||
ids.append(item['itemId'][0])
|
||||
|
||||
ids = list(set(ids))
|
||||
i += 1
|
||||
params = {
|
||||
"OPERATION-NAME":self.service,
|
||||
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
|
||||
"SERVICE-VERSION":"1.13.0",
|
||||
"RESPONSE-DATA-FORMAT":"JSON",
|
||||
"categoryId":category_id,
|
||||
"paginationInput.entriesPerPage":"100",
|
||||
"paginationInput.PageNumber":i,
|
||||
"itemFilter(0).name":"Condition",
|
||||
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
|
||||
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
|
||||
"sortOrder":"StartTimeNewest",
|
||||
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
|
||||
"itemFilter.value":'true'
|
||||
}
|
||||
|
||||
return ids
|
||||
|
||||
# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe
|
||||
# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
|
||||
@ -87,27 +113,22 @@ class FindingApi:
|
||||
Creates a 20-itemId list to use for the ShoppingApi
|
||||
call
|
||||
'''
|
||||
pages = self.pageNumber
|
||||
target_idspc = self.target_idspc
|
||||
|
||||
itemid_results_list = []
|
||||
|
||||
with open('cat_list.txt') as jf:
|
||||
cat_list = json.load(jf)
|
||||
|
||||
args = []
|
||||
for category_id in cat_list:
|
||||
|
||||
bargs = [(category_id, i) for i in pages]
|
||||
args.extend(bargs)
|
||||
args = [(cat, target_idspc) for cat in cat_list]
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
for future in executor.map(lambda p: self.get_data(*p), args):
|
||||
data = future
|
||||
itemid_results_list.extend(future)
|
||||
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
if item not in itemid_results_list:
|
||||
itemid_results_list.append(item['itemId'][0])
|
||||
with open('raw_ids.txt', 'w') as f:
|
||||
json.dump(itemid_results_list, f)
|
||||
|
||||
item_id_results = list(set(itemid_results_list))
|
||||
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
|
||||
len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
|
||||
return item_id_results
|
||||
@ -204,7 +225,7 @@ class ShoppingApi:
|
||||
service_dict
|
||||
|
||||
fnd_srvc = input(str(service_dict) + "choose Finding call: (press 'enter' for default(4))")
|
||||
pg_num = int(input('how many ids per cat? (7692 max)'))
|
||||
target_idspc = int(input('how many ids per cat? (7692 max)'))
|
||||
|
||||
optional_params = {
|
||||
"itemFilter(0).name":"Condition",
|
||||
@ -213,10 +234,10 @@ class ShoppingApi:
|
||||
|
||||
if fnd_srvc != '':
|
||||
fnd_srvc = int(fnd_srvc)
|
||||
finding = FindingApi(fnd_srvc, pg_num)
|
||||
finding = FindingApi(fnd_srvc, target_idspc)
|
||||
else:
|
||||
fnd_srvc = 4
|
||||
finding = FindingApi(fnd_srvc, pg_num)
|
||||
finding = FindingApi(fnd_srvc, target_idspc)
|
||||
|
||||
item_id_results = finding.get_ids_from_cats()
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
|
Loading…
Reference in New Issue
Block a user