modTimeFrom w dateutil datetime timedelta parser integration

This commit is contained in:
spbeach46 2021-11-21 16:05:20 -07:00
parent 7cee062b78
commit eb780faf40

View File

@ -1,4 +1,8 @@
import os
from datetime import datetime, timedelta
import dateutil
from dateutil import parser
import pytz
import pdb
from io import StringIO
import numpy as np
@ -21,56 +25,53 @@ class FindingApi:
Methods for accessing eBay's FindingApi services
'''
def __init__(self, service, target_idspc): #target ids per cat
def __init__(self, service, idspc):
self.service = [
'findItemsAdvanced', 'findCompletedItems',
'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
'findItemsByProduct'
][service] # Currently using only index 4, i.e., service = 4
self.target_idspc = target_idspc
# examples of additional params you may want to add:
self.idspc = idspc # examples of additional params you may want to add:
# 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
# 'itemFilter(1).name':'ListingType'
# 'itemFilter(1).value':'AuctionWithBIN'
# 'StartTimeNewest'
# HideDuplicateItems
def get_data(self, category_id, target_idspc):
def get_data(self, category_id, idspc):
'''
Gets raw JSON data fom FindingApi service call. Currently being used to
get itemIDs from categories;
'''
# startTime = dateutil.parser.isoparse( startTime )
# now = datetime.datetime.now(tz=pytz.UTC)
# days_on_site = (now - startTime).days # as int
'''
consider using the sortOrder param to update by the latest listings first.
Also consider using the exlude duplicates param and possibly others.
research ebay api docs to find cadidates
'''
i = 1
ids = []
modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value
i = 1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"100",
"paginationInput.entriesPerPage":"20",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"itemFilter(0).value":"Used",
"itemFilter.name":"HideDuplicateItems",
"itemFilter.value":"true",
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
"itemfilter.value":'true'
}
while len(ids) < target_idspc: # target_ids(per category)
# "itemFilter.name(2)":"modTimeFrom",
# "itemFilter.value(2)":modTimeFrom,
while len(ids) < idspc:
try:
print(len(ids))
print(params['paginationInput.PageNumber'])
input('press enter to continue')
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params, timeout=24)
response.raise_for_status()
@ -80,33 +81,39 @@ class FindingApi:
return ids
try:
data = response.json()
itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0]
modTimeFrom = dateutil.parser.isoparse( modTimeFrom )
modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
# if item not in ids:
ids.append(item['itemId'][0])
# ids = list(set(ids))
i += 1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"100",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
# but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
"sortOrder":"StartTimeNewest",
"itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
"itemfilter.value":'true'
}
#ids = list(set(ids))
except (AttributeError, KeyError):
print('AttributeError or KeyError. Exiting')
print(response.json())
return ids
return ids, data
input('press enter to continue')
i+=1
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"20",
"paginationInput.PageNumber":i,
"itemFilter(0).name":"Condition",
"itemFilter(0).value":"Used",
"itemFilter.name":"HideDuplicateItems",
"itemFilter.value":"true",
"sortOrder":"StartTimeNewest",
}
return ids, data, modTimeFrom, itemSearchURL
# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe
# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
@ -121,25 +128,32 @@ class FindingApi:
Creates a 20-itemId list to use for the ShoppingApi
call
'''
target_idspc = self.target_idspc
# target_idspc = self.target_idspc
idspc = self.idspc
itemid_results_list = []
with open('cat_list.txt') as jf:
cat_list = json.load(jf)
args = [(cat, target_idspc) for cat in cat_list]
for cat in cat_list:
args = [(cat, idspc) for cat in cat_list]
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(lambda p: self.get_data(*p), args):
itemid_results_list.extend(future)
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(lambda p: self.get_data(*p), args):
itemid_results_list.extend(future)
print(len(itemid_results_list))
a = list(set(itemid_results_list))
print(len(a))
input('press enter to continue')
with open('raw_ids.txt', 'w') as f:
json.dump(itemid_results_list, f)
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
return item_id_results
return item_id_results, itemid_results_list
# TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
# service on more pages (but only pages you haven't tried) and repeat the search process.