threaded cat loop page attempt 2 with modTimeFrom

This commit is contained in:
spbeach46 2021-11-21 12:47:04 -07:00
parent 1faa4e86fd
commit 7cee062b78

View File

@ -1,4 +1,5 @@
import os
import pdb
from io import StringIO
import numpy as np
import concurrent.futures
@ -62,43 +63,50 @@ class FindingApi:
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
"itemFilter.value":'true'
"itemfilter.value":'true'
}
while len(ids) < target_idspc:
while len(ids) < target_idspc: # target_ids(per category)
try:
print(len(ids))
print(params['paginationInput.PageNumber'])
input('press enter to continue')
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params, timeout=7)
params=params, timeout=24)
response.raise_for_status()
except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout?
print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION
print('connection error')
return ids
data = response.json()
try:
data = response.json()
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
if item not in ids:
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
# if item not in ids:
ids.append(item['itemId'][0])
ids = list(set(ids))
i += 1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"100",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
"itemFilter.value":'true'
}
# ids = list(set(ids))
i += 1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"100",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
"itemfilter.value":'true'
}
except (AttributeError, KeyError):
print('AttributeError or KeyError. Exiting')
return ids
return ids
return ids, data
# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe
# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a